diff options
| author | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-10-21 14:09:05 +0200 |
|---|---|---|
| committer | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-10-23 10:29:49 +0000 |
| commit | 53fc739e3d530a70e5371a08d001bacabc0233de (patch) | |
| tree | 836910be948b6d7702b6944fcf0b7947f576d4b4 | |
| parent | 3ed14d7b0d539f97f2d68c83cc02d6509b24aea7 (diff) | |
Update SPIRV-Cross
Change-Id: I15dbf83057b5aa435b87b80e219b113987735cad
Reviewed-by: Christian Strømme <christian.stromme@qt.io>
22 files changed, 10145 insertions, 2027 deletions
diff --git a/src/3rdparty/SPIRV-Cross/qt_attribution.json b/src/3rdparty/SPIRV-Cross/qt_attribution.json index 70df546..43847a0 100644 --- a/src/3rdparty/SPIRV-Cross/qt_attribution.json +++ b/src/3rdparty/SPIRV-Cross/qt_attribution.json @@ -7,7 +7,7 @@ "QtUsage": "Shader code generation.", "Homepage": "https://github.com/KhronosGroup/SPIRV-Cross", - "Version": "f647e655d489a7699305ada30cda808a7dac079f", + "Version": "ff1897ae0e1fc1e37c604933694477f335ca8e44", "License": "Apache License 2.0", "LicenseId": "Apache-2.0", "LicenseFile": "LICENSE", diff --git a/src/3rdparty/SPIRV-Cross/spirv.h b/src/3rdparty/SPIRV-Cross/spirv.h index 8da27dd..1b67617 100644 --- a/src/3rdparty/SPIRV-Cross/spirv.h +++ b/src/3rdparty/SPIRV-Cross/spirv.h @@ -53,12 +53,12 @@ typedef unsigned int SpvId; -#define SPV_VERSION 0x10300 -#define SPV_REVISION 6 +#define SPV_VERSION 0x10500 +#define SPV_REVISION 1 static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010300; -static const unsigned int SpvRevision = 6; +static const unsigned int SpvVersion = 0x00010400; +static const unsigned int SpvRevision = 1; static const unsigned int SpvOpCodeMask = 0xffff; static const unsigned int SpvWordCountShift = 16; @@ -95,6 +95,7 @@ typedef enum SpvAddressingModel_ { SpvAddressingModelLogical = 0, SpvAddressingModelPhysical32 = 1, SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64 = 5348, SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, SpvAddressingModelMax = 0x7fffffff, } SpvAddressingModel; @@ -103,6 +104,7 @@ typedef enum SpvMemoryModel_ { SpvMemoryModelSimple = 0, SpvMemoryModelGLSL450 = 1, SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkan = 3, SpvMemoryModelVulkanKHR = 3, SpvMemoryModelMax = 0x7fffffff, } SpvMemoryModel; @@ -158,6 +160,12 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeDerivativeGroupQuadsNV = 5289, SpvExecutionModeDerivativeGroupLinearNV = 5290, SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModePixelInterlockOrderedEXT = 5366, + SpvExecutionModePixelInterlockUnorderedEXT = 5367, + SpvExecutionModeSampleInterlockOrderedEXT = 5368, + SpvExecutionModeSampleInterlockUnorderedEXT = 5369, + SpvExecutionModeShadingRateInterlockOrderedEXT = 5370, + SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371, SpvExecutionModeMax = 0x7fffffff, } SpvExecutionMode; @@ -181,6 +189,7 @@ typedef enum SpvStorageClass_ { SpvStorageClassHitAttributeNV = 5339, SpvStorageClassIncomingRayPayloadNV = 5342, SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBuffer = 5349, SpvStorageClassPhysicalStorageBufferEXT = 5349, SpvStorageClassMax = 0x7fffffff, } SpvStorageClass; @@ -309,10 +318,16 @@ typedef enum SpvImageOperandsShift_ { SpvImageOperandsConstOffsetsShift = 5, SpvImageOperandsSampleShift = 6, SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableShift = 8, SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleShift = 9, SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelShift = 10, SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelShift = 11, SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsSignExtendShift = 12, + SpvImageOperandsZeroExtendShift = 13, SpvImageOperandsMax = 0x7fffffff, } SpvImageOperandsShift; @@ -326,10 +341,16 @@ typedef enum SpvImageOperandsMask_ { SpvImageOperandsConstOffsetsMask = 0x00000020, SpvImageOperandsSampleMask = 0x00000040, SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableMask = 0x00000100, SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleMask = 0x00000200, SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelMask = 0x00000400, SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelMask = 0x00000800, SpvImageOperandsVolatileTexelKHRMask = 0x00000800, + SpvImageOperandsSignExtendMask = 0x00001000, + SpvImageOperandsZeroExtendMask = 0x00002000, } SpvImageOperandsMask; typedef enum SpvFPFastMathModeShift_ { @@ -410,6 +431,7 @@ typedef enum SpvDecoration_ { SpvDecorationNonWritable = 24, SpvDecorationNonReadable = 25, SpvDecorationUniform = 26, + SpvDecorationUniformId = 27, SpvDecorationSaturatedConversion = 28, SpvDecorationStream = 29, SpvDecorationLocation = 30, @@ -441,11 +463,17 @@ typedef enum SpvDecoration_ { SpvDecorationPerViewNV = 5272, SpvDecorationPerTaskNV = 5273, SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniform = 5300, SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointer = 5355, SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointer = 5356, SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationCounterBuffer = 5634, SpvDecorationHlslCounterBufferGOOGLE = 5634, SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationUserSemantic = 5635, + SpvDecorationUserTypeGOOGLE = 5636, SpvDecorationMax = 0x7fffffff, } SpvDecoration; @@ -548,6 +576,10 @@ typedef enum SpvBuiltIn_ { SpvBuiltInHitTNV = 5332, SpvBuiltInHitKindNV = 5333, SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInWarpsPerSMNV = 5374, + SpvBuiltInSMCountNV = 5375, + SpvBuiltInWarpIDNV = 5376, + SpvBuiltInSMIDNV = 5377, SpvBuiltInMax = 0x7fffffff, } SpvBuiltIn; @@ -568,6 +600,11 @@ typedef enum SpvLoopControlShift_ { SpvLoopControlDontUnrollShift = 1, SpvLoopControlDependencyInfiniteShift = 2, SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMinIterationsShift = 4, + SpvLoopControlMaxIterationsShift = 5, + SpvLoopControlIterationMultipleShift = 6, + SpvLoopControlPeelCountShift = 7, + SpvLoopControlPartialCountShift = 8, SpvLoopControlMax = 0x7fffffff, } SpvLoopControlShift; @@ -577,6 +614,11 @@ typedef enum SpvLoopControlMask_ { SpvLoopControlDontUnrollMask = 0x00000002, SpvLoopControlDependencyInfiniteMask = 0x00000004, SpvLoopControlDependencyLengthMask = 0x00000008, + SpvLoopControlMinIterationsMask = 0x00000010, + SpvLoopControlMaxIterationsMask = 0x00000020, + SpvLoopControlIterationMultipleMask = 0x00000040, + SpvLoopControlPeelCountMask = 0x00000080, + SpvLoopControlPartialCountMask = 0x00000100, } SpvLoopControlMask; typedef enum SpvFunctionControlShift_ { @@ -606,9 +648,13 @@ typedef enum SpvMemorySemanticsShift_ { SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, SpvMemorySemanticsAtomicCounterMemoryShift = 10, SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryShift = 12, SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableShift = 13, SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleShift = 14, SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsVolatileShift = 15, SpvMemorySemanticsMax = 0x7fffffff, } SpvMemorySemanticsShift; @@ -624,17 +670,24 @@ typedef enum SpvMemorySemanticsMask_ { SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryMask = 0x00001000, SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableMask = 0x00002000, SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleMask = 0x00004000, SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, + SpvMemorySemanticsVolatileMask = 0x00008000, } SpvMemorySemanticsMask; typedef enum SpvMemoryAccessShift_ { SpvMemoryAccessVolatileShift = 0, SpvMemoryAccessAlignedShift = 1, SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableShift = 3, SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleShift = 4, SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerShift = 5, SpvMemoryAccessNonPrivatePointerKHRShift = 5, SpvMemoryAccessMax = 0x7fffffff, } SpvMemoryAccessShift; @@ -644,8 +697,11 @@ typedef enum SpvMemoryAccessMask_ { SpvMemoryAccessVolatileMask = 0x00000001, SpvMemoryAccessAlignedMask = 0x00000002, SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableMask = 0x00000008, SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleMask = 0x00000010, SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerMask = 0x00000020, SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, } SpvMemoryAccessMask; @@ -655,6 +711,7 @@ typedef enum SpvScope_ { SpvScopeWorkgroup = 2, SpvScopeSubgroup = 3, SpvScopeInvocation = 4, + SpvScopeQueueFamily = 5, SpvScopeQueueFamilyKHR = 5, SpvScopeMax = 0x7fffffff, } SpvScope; @@ -755,6 +812,8 @@ typedef enum SpvCapability_ { SpvCapabilityGroupNonUniformShuffleRelative = 66, SpvCapabilityGroupNonUniformClustered = 67, SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilityShaderLayer = 69, + SpvCapabilityShaderViewportIndex = 70, SpvCapabilitySubgroupBallotKHR = 4423, SpvCapabilityDrawParameters = 4427, SpvCapabilitySubgroupVoteKHR = 4431, @@ -783,6 +842,7 @@ typedef enum SpvCapability_ { SpvCapabilityFragmentMaskAMD = 5010, SpvCapabilityStencilExportEXT = 5013, SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilityShaderClockKHR = 5055, SpvCapabilitySampleMaskOverrideCoverageNV = 5249, SpvCapabilityGeometryShaderPassthroughNV = 5251, SpvCapabilityShaderViewportIndexLayerEXT = 5254, @@ -798,28 +858,52 @@ typedef enum SpvCapability_ { SpvCapabilityFragmentDensityEXT = 5291, SpvCapabilityShadingRateNV = 5291, SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniform = 5301, SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArray = 5302, SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303, SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304, SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305, SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306, SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexing = 5307, SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308, SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexing = 5309, SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310, SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, SpvCapabilityRayTracingNV = 5340, + SpvCapabilityVulkanMemoryModel = 5345, SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScope = 5346, SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddresses = 5347, SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, SpvCapabilityComputeDerivativeGroupLinearNV = 5350, SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilityFragmentShaderSampleInterlockEXT = 5363, + SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372, + SpvCapabilityShaderSMBuiltinsNV = 5373, + SpvCapabilityFragmentShaderPixelInterlockEXT = 5378, + SpvCapabilityDemoteToHelperInvocationEXT = 5379, SpvCapabilitySubgroupShuffleINTEL = 5568, SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, SpvCapabilitySubgroupImageBlockIOINTEL = 5570, SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityIntegerFunctions2INTEL = 5584, + SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696, + SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, SpvCapabilityMax = 0x7fffffff, } SpvCapability; @@ -1164,6 +1248,10 @@ typedef enum SpvOp_ { SpvOpGroupNonUniformLogicalXor = 364, SpvOpGroupNonUniformQuadBroadcast = 365, SpvOpGroupNonUniformQuadSwap = 366, + SpvOpCopyLogical = 400, + SpvOpPtrEqual = 401, + SpvOpPtrNotEqual = 402, + SpvOpPtrDiff = 403, SpvOpSubgroupBallotKHR = 4421, SpvOpSubgroupFirstInvocationKHR = 4422, SpvOpSubgroupAllKHR = 4428, @@ -1180,6 +1268,7 @@ typedef enum SpvOp_ { SpvOpGroupSMaxNonUniformAMD = 5007, SpvOpFragmentMaskFetchAMD = 5011, SpvOpFragmentFetchAMD = 5012, + SpvOpReadClockKHR = 5056, SpvOpImageSampleFootprintNV = 5283, SpvOpGroupNonUniformPartitionNV = 5296, SpvOpWritePackedPrimitiveIndices4x8NV = 5299, @@ -1194,6 +1283,10 @@ typedef enum SpvOp_ { SpvOpCooperativeMatrixStoreNV = 5360, SpvOpCooperativeMatrixMulAddNV = 5361, SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpBeginInvocationInterlockEXT = 5364, + SpvOpEndInvocationInterlockEXT = 5365, + SpvOpDemoteToHelperInvocationEXT = 5380, + SpvOpIsHelperInvocationEXT = 5381, SpvOpSubgroupShuffleINTEL = 5571, SpvOpSubgroupShuffleDownINTEL = 5572, SpvOpSubgroupShuffleUpINTEL = 5573, @@ -1204,10 +1297,676 @@ typedef enum SpvOp_ { SpvOpSubgroupImageBlockWriteINTEL = 5578, SpvOpSubgroupImageMediaBlockReadINTEL = 5580, SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpUCountLeadingZerosINTEL = 5585, + SpvOpUCountTrailingZerosINTEL = 5586, + SpvOpAbsISubINTEL = 5587, + SpvOpAbsUSubINTEL = 5588, + SpvOpIAddSatINTEL = 5589, + SpvOpUAddSatINTEL = 5590, + SpvOpIAverageINTEL = 5591, + SpvOpUAverageINTEL = 5592, + SpvOpIAverageRoundedINTEL = 5593, + SpvOpUAverageRoundedINTEL = 5594, + SpvOpISubSatINTEL = 5595, + SpvOpUSubSatINTEL = 5596, + SpvOpIMul32x16INTEL = 5597, + SpvOpUMul32x16INTEL = 5598, + SpvOpDecorateString = 5632, SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateString = 5633, SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpVmeImageINTEL = 5699, + SpvOpTypeVmeImageINTEL = 5700, + SpvOpTypeAvcImePayloadINTEL = 5701, + SpvOpTypeAvcRefPayloadINTEL = 5702, + SpvOpTypeAvcSicPayloadINTEL = 5703, + SpvOpTypeAvcMcePayloadINTEL = 5704, + SpvOpTypeAvcMceResultINTEL = 5705, + SpvOpTypeAvcImeResultINTEL = 5706, + SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710, + SpvOpTypeAvcRefResultINTEL = 5711, + SpvOpTypeAvcSicResultINTEL = 5712, + SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733, + SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735, + SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737, + SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + SpvOpSubgroupAvcImeInitializeINTEL = 5747, + SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749, + SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750, + SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765, + SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776, + SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + SpvOpSubgroupAvcFmeInitializeINTEL = 5781, + SpvOpSubgroupAvcBmeInitializeINTEL = 5782, + SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790, + SpvOpSubgroupAvcSicInitializeINTEL = 5791, + SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792, + SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803, + SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808, + SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816, SpvOpMax = 0x7fffffff, } SpvOp; +#ifdef SPV_ENABLE_UTILITY_CODE +inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case SpvOpNop: *hasResult = false; *hasResultType = false; break; + case SpvOpUndef: *hasResult = true; *hasResultType = true; break; + case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break; + case SpvOpSource: *hasResult = false; *hasResultType = false; break; + case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpName: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberName: *hasResult = false; *hasResultType = false; break; + case SpvOpString: *hasResult = true; *hasResultType = false; break; + case SpvOpLine: *hasResult = false; *hasResultType = false; break; + case SpvOpExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break; + case SpvOpExtInst: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break; + case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break; + case SpvOpCapability: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case SpvOpFunction: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break; + case SpvOpVariable: *hasResult = true; *hasResultType = true; break; + case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case SpvOpLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpStore: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break; + case SpvOpTranspose: *hasResult = true; *hasResultType = true; break; + case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageRead: *hasResult = true; *hasResultType = true; break; + case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break; + case SpvOpImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break; + case SpvOpUConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpSConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpFConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case SpvOpBitcast: *hasResult = true; *hasResultType = true; break; + case SpvOpSNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpFNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpISub: *hasResult = true; *hasResultType = true; break; + case SpvOpFSub: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpUDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpSDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpFDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpUMod: *hasResult = true; *hasResultType = true; break; + case SpvOpSRem: *hasResult = true; *hasResultType = true; break; + case SpvOpSMod: *hasResult = true; *hasResultType = true; break; + case SpvOpFRem: *hasResult = true; *hasResultType = true; break; + case SpvOpFMod: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break; + case SpvOpDot: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break; + case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break; + case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpAny: *hasResult = true; *hasResultType = true; break; + case SpvOpAll: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNan: *hasResult = true; *hasResultType = true; break; + case SpvOpIsInf: *hasResult = true; *hasResultType = true; break; + case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break; + case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break; + case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case SpvOpOrdered: *hasResult = true; *hasResultType = true; break; + case SpvOpUnordered: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break; + case SpvOpSelect: *hasResult = true; *hasResultType = true; break; + case SpvOpIEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpNot: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break; + case SpvOpBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdx: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdy: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidth: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break; + case SpvOpPhi: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpLabel: *hasResult = true; *hasResultType = false; break; + case SpvOpBranch: *hasResult = false; *hasResultType = false; break; + case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break; + case SpvOpSwitch: *hasResult = false; *hasResultType = false; break; + case SpvOpKill: *hasResult = false; *hasResultType = false; break; + case SpvOpReturn: *hasResult = false; *hasResultType = false; break; + case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break; + case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case SpvOpNoLine: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break; + case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + #endif diff --git a/src/3rdparty/SPIRV-Cross/spirv.hpp b/src/3rdparty/SPIRV-Cross/spirv.hpp index adc13de..f5cbda1 100644 --- a/src/3rdparty/SPIRV-Cross/spirv.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv.hpp @@ -49,12 +49,12 @@ namespace spv { typedef unsigned int Id; -#define SPV_VERSION 0x10300 -#define SPV_REVISION 6 +#define SPV_VERSION 0x10500 +#define SPV_REVISION 1 static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010300; -static const unsigned int Revision = 6; +static const unsigned int Version = 0x00010400; +static const unsigned int Revision = 1; static const unsigned int OpCodeMask = 0xffff; static const unsigned int WordCountShift = 16; @@ -91,6 +91,7 @@ enum AddressingModel { AddressingModelLogical = 0, AddressingModelPhysical32 = 1, AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, AddressingModelPhysicalStorageBuffer64EXT = 5348, AddressingModelMax = 0x7fffffff, }; @@ -99,6 +100,7 @@ enum MemoryModel { MemoryModelSimple = 0, MemoryModelGLSL450 = 1, MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, MemoryModelVulkanKHR = 3, MemoryModelMax = 0x7fffffff, }; @@ -154,6 +156,12 @@ enum ExecutionMode { ExecutionModeDerivativeGroupQuadsNV = 5289, ExecutionModeDerivativeGroupLinearNV = 5290, ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, ExecutionModeMax = 0x7fffffff, }; @@ -177,6 +185,7 @@ enum StorageClass { StorageClassHitAttributeNV = 5339, StorageClassIncomingRayPayloadNV = 5342, StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, StorageClassPhysicalStorageBufferEXT = 5349, StorageClassMax = 0x7fffffff, }; @@ -305,10 +314,16 @@ enum ImageOperandsShift { ImageOperandsConstOffsetsShift = 5, ImageOperandsSampleShift = 6, ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, ImageOperandsMax = 0x7fffffff, }; @@ -322,10 +337,16 @@ enum ImageOperandsMask { ImageOperandsConstOffsetsMask = 0x00000020, ImageOperandsSampleMask = 0x00000040, ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, }; enum FPFastMathModeShift { @@ -406,6 +427,7 @@ enum Decoration { DecorationNonWritable = 24, DecorationNonReadable = 25, DecorationUniform = 26, + DecorationUniformId = 27, DecorationSaturatedConversion = 28, DecorationStream = 29, DecorationLocation = 30, @@ -437,11 +459,17 @@ enum Decoration { DecorationPerViewNV = 5272, DecorationPerTaskNV = 5273, DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, DecorationAliasedPointerEXT = 5356, + DecorationCounterBuffer = 5634, DecorationHlslCounterBufferGOOGLE = 5634, DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, DecorationMax = 0x7fffffff, }; @@ -544,6 +572,10 @@ enum BuiltIn { BuiltInHitTNV = 5332, BuiltInHitKindNV = 5333, BuiltInIncomingRayFlagsNV = 5351, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, BuiltInMax = 0x7fffffff, }; @@ -564,6 +596,11 @@ enum LoopControlShift { LoopControlDontUnrollShift = 1, LoopControlDependencyInfiniteShift = 2, LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, LoopControlMax = 0x7fffffff, }; @@ -573,6 +610,11 @@ enum LoopControlMask { LoopControlDontUnrollMask = 0x00000002, LoopControlDependencyInfiniteMask = 0x00000004, LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, }; enum FunctionControlShift { @@ -602,9 +644,13 @@ enum MemorySemanticsShift { MemorySemanticsCrossWorkgroupMemoryShift = 9, MemorySemanticsAtomicCounterMemoryShift = 10, MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, MemorySemanticsMax = 0x7fffffff, }; @@ -620,17 +666,24 @@ enum MemorySemanticsMask { MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, MemorySemanticsAtomicCounterMemoryMask = 0x00000400, MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, }; enum MemoryAccessShift { MemoryAccessVolatileShift = 0, MemoryAccessAlignedShift = 1, MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, MemoryAccessNonPrivatePointerKHRShift = 5, MemoryAccessMax = 0x7fffffff, }; @@ -640,8 +693,11 @@ enum MemoryAccessMask { MemoryAccessVolatileMask = 0x00000001, MemoryAccessAlignedMask = 0x00000002, MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, MemoryAccessNonPrivatePointerKHRMask = 0x00000020, }; @@ -651,6 +707,7 @@ enum Scope { ScopeWorkgroup = 2, ScopeSubgroup = 3, ScopeInvocation = 4, + ScopeQueueFamily = 5, ScopeQueueFamilyKHR = 5, ScopeMax = 0x7fffffff, }; @@ -751,6 +808,8 @@ enum Capability { CapabilityGroupNonUniformShuffleRelative = 66, CapabilityGroupNonUniformClustered = 67, CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, CapabilitySubgroupVoteKHR = 4431, @@ -779,6 +838,7 @@ enum Capability { CapabilityFragmentMaskAMD = 5010, CapabilityStencilExportEXT = 5013, CapabilityImageReadWriteLodAMD = 5015, + CapabilityShaderClockKHR = 5055, CapabilitySampleMaskOverrideCoverageNV = 5249, CapabilityGeometryShaderPassthroughNV = 5251, CapabilityShaderViewportIndexLayerEXT = 5254, @@ -794,27 +854,52 @@ enum Capability { CapabilityFragmentDensityEXT = 5291, CapabilityShadingRateNV = 5291, CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, CapabilityRayTracingNV = 5340, + CapabilityVulkanMemoryModel = 5345, CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, CapabilityPhysicalStorageBufferAddressesEXT = 5347, CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocationEXT = 5379, CapabilitySubgroupShuffleINTEL = 5568, CapabilitySubgroupBufferBlockIOINTEL = 5569, CapabilitySubgroupImageBlockIOINTEL = 5570, CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, CapabilityMax = 0x7fffffff, }; @@ -1159,6 +1244,10 @@ enum Op { OpGroupNonUniformLogicalXor = 364, OpGroupNonUniformQuadBroadcast = 365, OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, OpSubgroupBallotKHR = 4421, OpSubgroupFirstInvocationKHR = 4422, OpSubgroupAllKHR = 4428, @@ -1175,6 +1264,7 @@ enum Op { OpGroupSMaxNonUniformAMD = 5007, OpFragmentMaskFetchAMD = 5011, OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, OpImageSampleFootprintNV = 5283, OpGroupNonUniformPartitionNV = 5296, OpWritePackedPrimitiveIndices4x8NV = 5299, @@ -1184,6 +1274,15 @@ enum Op { OpTraceNV = 5337, OpTypeAccelerationStructureNV = 5341, OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, OpSubgroupShuffleINTEL = 5571, OpSubgroupShuffleDownINTEL = 5572, OpSubgroupShuffleUpINTEL = 5573, @@ -1194,11 +1293,677 @@ enum Op { OpSubgroupImageBlockWriteINTEL = 5578, OpSubgroupImageMediaBlockReadINTEL = 5580, OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpDecorateString = 5632, OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, OpMax = 0x7fffffff, }; +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + // Overload operator| for mask bit combining inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp index 2f3cf25..463c756 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp @@ -61,7 +61,7 @@ void CFG::build_immediate_dominators() if (immediate_dominators[block]) { assert(immediate_dominators[edge]); - immediate_dominators[block] = find_common_dominator(block, edge); + immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge); } else immediate_dominators[block] = edge; @@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const // We have a back edge if the visit order is set with the temporary magic value 0. // Crossing edges will have already been recorded with a visit order. auto itr = visit_order.find(to); - assert(itr != end(visit_order)); - return itr->second.get() == 0; + return itr != end(visit_order) && itr->second.get() == 0; +} + +bool CFG::has_visited_forward_edge(uint32_t to) const +{ + // If > 0, we have visited the edge already, and this is not a back edge branch. + auto itr = visit_order.find(to); + return itr != end(visit_order) && itr->second.get() > 0; } bool CFG::post_order_visit(uint32_t block_id) @@ -83,14 +89,30 @@ bool CFG::post_order_visit(uint32_t block_id) // If we have already branched to this block (back edge), stop recursion. // If our branches are back-edges, we do not record them. // We have to record crossing edges however. - if (visit_order[block_id].get() >= 0) - return !is_back_edge(block_id); + if (has_visited_forward_edge(block_id)) + return true; + else if (is_back_edge(block_id)) + return false; // Block back-edges from recursively revisiting ourselves. visit_order[block_id].get() = 0; - // First visit our branch targets. auto &block = compiler.get<SPIRBlock>(block_id); + + // If this is a loop header, add an implied branch to the merge target. + // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. + // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. + // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. + // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. + + // Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop + // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis. + // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine, + // but for loops, only the header might end up actually branching to merge block. + if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + add_branch(block_id, block.merge_block); + + // First visit our branch targets. switch (block.terminator) { case SPIRBlock::Direct: @@ -119,12 +141,56 @@ bool CFG::post_order_visit(uint32_t block_id) break; } - // If this is a loop header, add an implied branch to the merge target. - // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. - // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. - // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. - if (block.merge == SPIRBlock::MergeLoop) - add_branch(block_id, block.merge_block); + // If this is a selection merge, add an implied branch to the merge target. + // This is needed to avoid cases where an inner branch dominates the outer branch. + // This can happen if one of the branches exit early, e.g.: + // if (cond) { ...; break; } else { var = 100 } use_var(var); + // We can use the variable without a Phi since there is only one possible parent here. + // However, in this case, we need to hoist out the inner variable to outside the branch. + // Use same strategy as loops. + if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block)) + { + // If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup. + // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement + // will be hoisted out to outside the selection merge. + // If size > 1, the variable will be automatically hoisted, so we should not mess with it. + // The exception here is switch blocks, where we can have multiple edges to merge block, + // all coming from same scope, so be more conservative in this case. + // Adding fake branches unconditionally breaks parameter preservation analysis, + // which looks at how variables are accessed through the CFG. + auto pred_itr = preceding_edges.find(block.next_block); + if (pred_itr != end(preceding_edges)) + { + auto &pred = pred_itr->second; + auto succ_itr = succeeding_edges.find(block_id); + size_t num_succeeding_edges = 0; + if (succ_itr != end(succeeding_edges)) + num_succeeding_edges = succ_itr->second.size(); + + if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1) + { + // Multiple branches can come from the same scope due to "break;", so we need to assume that all branches + // come from same case scope in worst case, even if there are multiple preceding edges. + // If we have more than one succeeding edge from the block header, it should be impossible + // to have a dominator be inside the block. + // Only case this can go wrong is if we have 2 or more edges from block header and + // 2 or more edges to merge block, and still have dominator be inside a case label. + if (!pred.empty()) + add_branch(block_id, block.next_block); + } + else + { + if (pred.size() == 1 && *pred.begin() != block_id) + add_branch(block_id, block.next_block); + } + } + else + { + // If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it. + // We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge. + add_branch(block_id, block.next_block); + } + } // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. visit_order[block_id].get() = ++visit_count; @@ -152,6 +218,111 @@ void CFG::add_branch(uint32_t from, uint32_t to) add_unique(succeeding_edges[from], to); } +uint32_t CFG::find_loop_dominator(uint32_t block_id) const +{ + while (block_id != SPIRBlock::NoDominator) + { + auto itr = preceding_edges.find(block_id); + if (itr == end(preceding_edges)) + return SPIRBlock::NoDominator; + if (itr->second.empty()) + return SPIRBlock::NoDominator; + + uint32_t pred_block_id = SPIRBlock::NoDominator; + bool ignore_loop_header = false; + + // If we are a merge block, go directly to the header block. + // Only consider a loop dominator if we are branching from inside a block to a loop header. + // NOTE: In the CFG we forced an edge from header to merge block always to support variable scopes properly. + for (auto &pred : itr->second) + { + auto &pred_block = compiler.get<SPIRBlock>(pred); + if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id)) + { + pred_block_id = pred; + ignore_loop_header = true; + break; + } + else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id)) + { + pred_block_id = pred; + break; + } + } + + // No merge block means we can just pick any edge. Loop headers dominate the inner loop, so any path we + // take will lead there. + if (pred_block_id == SPIRBlock::NoDominator) + pred_block_id = itr->second.front(); + + block_id = pred_block_id; + + if (!ignore_loop_header && block_id) + { + auto &block = compiler.get<SPIRBlock>(block_id); + if (block.merge == SPIRBlock::MergeLoop) + return block_id; + } + } + + return block_id; +} + +bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const +{ + // Walk backwards, starting from "to" block. + // Only follow pred edges if they have a 1:1 relationship, or a merge relationship. + // If we cannot find a path to "from", we must assume that to is inside control flow in some way. + + auto &from_block = compiler.get<SPIRBlock>(from); + BlockID ignore_block_id = 0; + if (from_block.merge == SPIRBlock::MergeLoop) + ignore_block_id = from_block.merge_block; + + while (to != from) + { + auto pred_itr = preceding_edges.find(to); + if (pred_itr == end(preceding_edges)) + return false; + + DominatorBuilder builder(*this); + for (auto &edge : pred_itr->second) + builder.add_block(edge); + + uint32_t dominator = builder.get_dominator(); + if (dominator == 0) + return false; + + auto &dom = compiler.get<SPIRBlock>(dominator); + + bool true_path_ignore = false; + bool false_path_ignore = false; + if (ignore_block_id && dom.terminator == SPIRBlock::Select) + { + auto &true_block = compiler.get<SPIRBlock>(dom.true_block); + auto &false_block = compiler.get<SPIRBlock>(dom.false_block); + auto &ignore_block = compiler.get<SPIRBlock>(ignore_block_id); + true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); + false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + } + + if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) || + (dom.terminator == SPIRBlock::Direct && dom.next_block == to) || + (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) || + (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore)) + { + // Allow walking selection constructs if the other branch reaches out of a loop construct. + // It cannot be in-scope anymore. + to = dominator; + } + else + return false; + } + + return true; +} + DominatorBuilder::DominatorBuilder(const CFG &cfg_) : cfg(cfg_) { diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp index be10371..5f62cca 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp @@ -88,11 +88,17 @@ public: return; seen_blocks.insert(block); - op(block); - for (auto b : get_succeeding_edges(block)) - walk_from(seen_blocks, b, op); + if (op(block)) + { + for (auto b : get_succeeding_edges(block)) + walk_from(seen_blocks, b, op); + } } + uint32_t find_loop_dominator(uint32_t block) const; + + bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const; + private: struct VisitOrder { @@ -125,6 +131,7 @@ private: uint32_t visit_count = 0; bool is_back_edge(uint32_t to) const; + bool has_visited_forward_edge(uint32_t to) const; }; class DominatorBuilder diff --git a/src/3rdparty/SPIRV-Cross/spirv_common.hpp b/src/3rdparty/SPIRV-Cross/spirv_common.hpp index 0cf1f56..c1c6fc8 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_common.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_common.hpp @@ -20,6 +20,7 @@ #include "spirv.hpp" #include "spirv_cross_containers.hpp" #include "spirv_cross_error_handling.hpp" +#include <functional> // A bit crude, but allows projects which embed SPIRV-Cross statically to // effectively hide all the symbols from other projects. @@ -183,14 +184,14 @@ std::string join(Ts &&... ts) return stream.str(); } -inline std::string merge(const SmallVector<std::string> &list) +inline std::string merge(const SmallVector<std::string> &list, const char *between = ", ") { StringStream<> stream; for (auto &elem : list) { stream << elem; if (&elem != &list.back()) - stream << ", "; + stream << between; } return stream.str(); } @@ -270,20 +271,6 @@ struct Instruction uint32_t length = 0; }; -// Helper for Variant interface. -struct IVariant -{ - virtual ~IVariant() = default; - virtual IVariant *clone(ObjectPoolBase *pool) = 0; - uint32_t self = 0; -}; - -#define SPIRV_CROSS_DECLARE_CLONE(T) \ - IVariant *clone(ObjectPoolBase *pool) override \ - { \ - return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \ - } - enum Types { TypeNone, @@ -299,9 +286,140 @@ enum Types TypeCombinedImageSampler, TypeAccessChain, TypeUndef, + TypeString, TypeCount }; +template <Types type> +class TypedID; + +template <> +class TypedID<TypeNone> +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + template <Types U> + TypedID(const TypedID<U> &other) + { + *this = other; + } + + template <Types U> + TypedID &operator=(const TypedID<U> &other) + { + id = uint32_t(other); + return *this; + } + + // Implicit conversion to u32 is desired here. + // As long as we block implicit conversion between TypedID<A> and TypedID<B> we're good. + operator uint32_t() const + { + return id; + } + + template <Types U> + operator TypedID<U>() const + { + return TypedID<U>(*this); + } + + bool operator==(const TypedID &other) const + { + return id == other.id; + } + + bool operator!=(const TypedID &other) const + { + return id != other.id; + } + + template <Types type> + bool operator==(const TypedID<type> &other) const + { + return id == uint32_t(other); + } + + template <Types type> + bool operator!=(const TypedID<type> &other) const + { + return id != uint32_t(other); + } + +private: + uint32_t id = 0; +}; + +template <Types type> +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + explicit TypedID(const TypedID<TypeNone> &other) + : id(uint32_t(other)) + { + } + + operator uint32_t() const + { + return id; + } + + bool operator==(const TypedID &other) const + { + return id == other.id; + } + + bool operator!=(const TypedID &other) const + { + return id != other.id; + } + + bool operator==(const TypedID<TypeNone> &other) const + { + return id == uint32_t(other); + } + + bool operator!=(const TypedID<TypeNone> &other) const + { + return id != uint32_t(other); + } + +private: + uint32_t id = 0; +}; + +using VariableID = TypedID<TypeVariable>; +using TypeID = TypedID<TypeType>; +using ConstantID = TypedID<TypeConstant>; +using FunctionID = TypedID<TypeFunction>; +using BlockID = TypedID<TypeBlock>; +using ID = TypedID<TypeNone>; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + virtual IVariant *clone(ObjectPoolBase *pool) = 0; + ID self = 0; +}; + +#define SPIRV_CROSS_DECLARE_CLONE(T) \ + IVariant *clone(ObjectPoolBase *pool) override \ + { \ + return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \ + } + struct SPIRUndef : IVariant { enum @@ -309,15 +427,32 @@ struct SPIRUndef : IVariant type = TypeUndef }; - explicit SPIRUndef(uint32_t basetype_) + explicit SPIRUndef(TypeID basetype_) : basetype(basetype_) { } - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRUndef) }; +struct SPIRString : IVariant +{ + enum + { + type = TypeString + }; + + explicit SPIRString(std::string str_) + : str(std::move(str_)) + { + } + + std::string str; + + SPIRV_CROSS_DECLARE_CLONE(SPIRString) +}; + // This type is only used by backends which need to access the combined image and sampler IDs separately after // the OpSampledImage opcode. struct SPIRCombinedImageSampler : IVariant @@ -326,15 +461,15 @@ struct SPIRCombinedImageSampler : IVariant { type = TypeCombinedImageSampler }; - SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_) + SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_) : combined_type(type_) , image(image_) , sampler(sampler_) { } - uint32_t combined_type; - uint32_t image; - uint32_t sampler; + TypeID combined_type; + VariableID image; + VariableID sampler; SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler) }; @@ -346,16 +481,18 @@ struct SPIRConstantOp : IVariant type = TypeConstantOp }; - SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length) + SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length) : opcode(op) - , arguments(args, args + length) , basetype(result_type) { + arguments.reserve(length); + for (uint32_t i = 0; i < length; i++) + arguments.push_back(args[i]); } spv::Op opcode; SmallVector<uint32_t> arguments; - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp) }; @@ -418,11 +555,11 @@ struct SPIRType : IVariant spv::StorageClass storage = spv::StorageClassGeneric; - SmallVector<uint32_t> member_types; + SmallVector<TypeID> member_types; struct ImageType { - uint32_t type; + TypeID type; spv::Dim dim; bool depth; bool arrayed; @@ -435,11 +572,11 @@ struct SPIRType : IVariant // Structs can be declared multiple times if they are used as part of interface blocks. // We want to detect this so that we only emit the struct definition once. // Since we cannot rely on OpName to be equal, we need to figure out aliases. - uint32_t type_alias = 0; + TypeID type_alias = 0; // Denotes the type which this type is based on. // Allows the backend to traverse how a complex type is built up during access chains. - uint32_t parent_type = 0; + TypeID parent_type = 0; // Used in backends to avoid emitting members with conflicting names. std::unordered_set<std::string> member_name_cache; @@ -458,6 +595,7 @@ struct SPIRExtension : IVariant { Unsupported, GLSL, + SPV_debug_info, SPV_AMD_shader_ballot, SPV_AMD_shader_explicit_vertex_parameter, SPV_AMD_shader_trinary_minmax, @@ -477,7 +615,7 @@ struct SPIRExtension : IVariant // so in order to avoid conflicts, we can't stick them in the ids array. struct SPIREntryPoint { - SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name) + SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name) : self(self_) , name(entry_name) , orig_name(entry_name) @@ -486,10 +624,10 @@ struct SPIREntryPoint } SPIREntryPoint() = default; - uint32_t self = 0; + FunctionID self = 0; std::string name; std::string orig_name; - SmallVector<uint32_t> interface_variables; + SmallVector<VariableID> interface_variables; Bitset flags; struct @@ -510,7 +648,7 @@ struct SPIRExpression : IVariant }; // Only created by the backend target to avoid creating tons of temporaries. - SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_) + SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_) : expression(move(expr)) , expression_type(expression_type_) , immutable(immutable_) @@ -520,14 +658,14 @@ struct SPIRExpression : IVariant // If non-zero, prepend expression with to_expression(base_expression). // Used in amortizing multiple calls to to_expression() // where in certain cases that would quickly force a temporary when not needed. - uint32_t base_expression = 0; + ID base_expression = 0; std::string expression; - uint32_t expression_type = 0; + TypeID expression_type = 0; // If this expression is a forwarded load, // allow us to reference the original variable. - uint32_t loaded_from = 0; + ID loaded_from = 0; // If this expression will never change, we can avoid lots of temporaries // in high level source. @@ -543,11 +681,11 @@ struct SPIRExpression : IVariant bool access_chain = false; // A list of expressions which this expression depends on. - SmallVector<uint32_t> expression_dependencies; + SmallVector<ID> expression_dependencies; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector<uint32_t> implied_read_expressions; + SmallVector<ID> implied_read_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) }; @@ -559,12 +697,12 @@ struct SPIRFunctionPrototype : IVariant type = TypeFunctionPrototype }; - explicit SPIRFunctionPrototype(uint32_t return_type_) + explicit SPIRFunctionPrototype(TypeID return_type_) : return_type(return_type_) { } - uint32_t return_type; + TypeID return_type; SmallVector<uint32_t> parameter_types; SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype) @@ -639,23 +777,23 @@ struct SPIRBlock : IVariant Terminator terminator = Unknown; Merge merge = MergeNone; Hints hint = HintNone; - uint32_t next_block = 0; - uint32_t merge_block = 0; - uint32_t continue_block = 0; + BlockID next_block = 0; + BlockID merge_block = 0; + BlockID continue_block = 0; - uint32_t return_value = 0; // If 0, return nothing (void). - uint32_t condition = 0; - uint32_t true_block = 0; - uint32_t false_block = 0; - uint32_t default_block = 0; + ID return_value = 0; // If 0, return nothing (void). + ID condition = 0; + BlockID true_block = 0; + BlockID false_block = 0; + BlockID default_block = 0; SmallVector<Instruction> ops; struct Phi { - uint32_t local_variable; // flush local variable ... - uint32_t parent; // If we're in from_block and want to branch into this block ... - uint32_t function_variable; // to this function-global "phi" variable first. + ID local_variable; // flush local variable ... + BlockID parent; // If we're in from_block and want to branch into this block ... + VariableID function_variable; // to this function-global "phi" variable first. }; // Before entering this block flush out local variables to magical "phi" variables. @@ -663,16 +801,16 @@ struct SPIRBlock : IVariant // Declare these temporaries before beginning the block. // Used for handling complex continue blocks which have side effects. - SmallVector<std::pair<uint32_t, uint32_t>> declare_temporary; + SmallVector<std::pair<TypeID, ID>> declare_temporary; // Declare these temporaries, but only conditionally if this block turns out to be // a complex loop header. - SmallVector<std::pair<uint32_t, uint32_t>> potential_declare_temporary; + SmallVector<std::pair<TypeID, ID>> potential_declare_temporary; struct Case { uint32_t value; - uint32_t block; + BlockID block; }; SmallVector<Case> cases; @@ -686,23 +824,27 @@ struct SPIRBlock : IVariant // Do we need a ladder variable to defer breaking out of a loop construct after a switch block? bool need_ladder_break = false; + // If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch. + // Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi. + BlockID ignore_phi_from_block = 0; + // The dominating block which this block might be within. // Used in continue; blocks to determine if we really need to write continue. - uint32_t loop_dominator = 0; + BlockID loop_dominator = 0; // All access to these variables are dominated by this block, // so before branching anywhere we need to make sure that we declare these variables. - SmallVector<uint32_t> dominated_variables; + SmallVector<VariableID> dominated_variables; // These are variables which should be declared in a for loop header, if we // fail to use a classic for-loop, // we remove these variables, and fall back to regular variables outside the loop. - SmallVector<uint32_t> loop_variables; + SmallVector<VariableID> loop_variables; // Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or // sub-group-like operations. // Make sure that we only use these expressions in the original block. - SmallVector<uint32_t> invalidate_expressions; + SmallVector<ID> invalidate_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRBlock) }; @@ -714,7 +856,7 @@ struct SPIRFunction : IVariant type = TypeFunction }; - SPIRFunction(uint32_t return_type_, uint32_t function_type_) + SPIRFunction(TypeID return_type_, TypeID function_type_) : return_type(return_type_) , function_type(function_type_) { @@ -722,8 +864,8 @@ struct SPIRFunction : IVariant struct Parameter { - uint32_t type; - uint32_t id; + TypeID type; + ID id; uint32_t read_count; uint32_t write_count; @@ -745,33 +887,40 @@ struct SPIRFunction : IVariant // or a global ID. struct CombinedImageSamplerParameter { - uint32_t id; - uint32_t image_id; - uint32_t sampler_id; + VariableID id; + VariableID image_id; + VariableID sampler_id; bool global_image; bool global_sampler; bool depth; }; - uint32_t return_type; - uint32_t function_type; + TypeID return_type; + TypeID function_type; SmallVector<Parameter> arguments; // Can be used by backends to add magic arguments. // Currently used by combined image/sampler implementation. SmallVector<Parameter> shadow_arguments; - SmallVector<uint32_t> local_variables; - uint32_t entry_block = 0; - SmallVector<uint32_t> blocks; + SmallVector<VariableID> local_variables; + BlockID entry_block = 0; + SmallVector<BlockID> blocks; SmallVector<CombinedImageSamplerParameter> combined_parameters; - void add_local_variable(uint32_t id) + struct EntryLine + { + uint32_t file_id = 0; + uint32_t line_literal = 0; + }; + EntryLine entry_line; + + void add_local_variable(VariableID id) { local_variables.push_back(id); } - void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false) + void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false) { // Arguments are read-only until proven otherwise. arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable }); @@ -792,7 +941,7 @@ struct SPIRFunction : IVariant // On function entry, make sure to copy a constant array into thread addr space to work around // the case where we are passing a constant array by value to a function on backends which do not // consider arrays value types. - SmallVector<uint32_t> constant_arrays_needed_on_stack; + SmallVector<ID> constant_arrays_needed_on_stack; bool active = false; bool flush_undeclared = true; @@ -808,7 +957,7 @@ struct SPIRAccessChain : IVariant type = TypeAccessChain }; - SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, + SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, int32_t static_index_) : basetype(basetype_) , storage(storage_) @@ -823,20 +972,20 @@ struct SPIRAccessChain : IVariant // which has no usable buffer type ala GLSL SSBOs. // StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses. - uint32_t basetype; + TypeID basetype; spv::StorageClass storage; std::string base; std::string dynamic_index; int32_t static_index; - uint32_t loaded_from = 0; + VariableID loaded_from = 0; uint32_t matrix_stride = 0; bool row_major_matrix = false; bool immutable = false; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector<uint32_t> implied_read_expressions; + SmallVector<ID> implied_read_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain) }; @@ -849,7 +998,7 @@ struct SPIRVariable : IVariant }; SPIRVariable() = default; - SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0) + SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0) : basetype(basetype_) , storage(storage_) , initializer(initializer_) @@ -857,11 +1006,11 @@ struct SPIRVariable : IVariant { } - uint32_t basetype = 0; + TypeID basetype = 0; spv::StorageClass storage = spv::StorageClassGeneric; uint32_t decoration = 0; - uint32_t initializer = 0; - uint32_t basevariable = 0; + ID initializer = 0; + VariableID basevariable = 0; SmallVector<uint32_t> dereference_chain; bool compat_builtin = false; @@ -871,10 +1020,10 @@ struct SPIRVariable : IVariant // When we read the variable as an expression, just forward // shadowed_id as the expression. bool statically_assigned = false; - uint32_t static_expression = 0; + ID static_expression = 0; // Temporaries which can remain forwarded as long as this variable is not modified. - SmallVector<uint32_t> dependees; + SmallVector<ID> dependees; bool forwardable = true; bool deferred_declaration = false; @@ -887,7 +1036,7 @@ struct SPIRVariable : IVariant uint32_t remapped_components = 0; // The block which dominates all access to this variable. - uint32_t dominator = 0; + BlockID dominator = 0; // If true, this variable is a loop variable, when accessing the variable // outside a loop, // we should statically forward it. @@ -907,7 +1056,8 @@ struct SPIRConstant : IVariant type = TypeConstant }; - union Constant { + union Constant + { uint32_t u32; int32_t i32; float f32; @@ -921,15 +1071,12 @@ struct SPIRConstant : IVariant { Constant r[4]; // If != 0, this element is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t vecsize = 1; - // Workaround for MSVC 2013, initializing an array breaks. ConstantVector() { memset(r, 0, sizeof(r)); - for (unsigned i = 0; i < 4; i++) - id[i] = 0; } }; @@ -937,15 +1084,8 @@ struct SPIRConstant : IVariant { ConstantVector c[4]; // If != 0, this column is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t columns = 1; - - // Workaround for MSVC 2013, initializing an array breaks. - ConstantMatrix() - { - for (unsigned i = 0; i < 4; i++) - id[i] = 0; - } }; static inline float f16_to_f32(uint16_t u16_value) @@ -955,7 +1095,8 @@ struct SPIRConstant : IVariant int e = (u16_value >> 10) & 0x1f; int m = (u16_value >> 0) & 0x3ff; - union { + union + { float f32; uint32_t u32; } u; @@ -1109,16 +1250,18 @@ struct SPIRConstant : IVariant SPIRConstant() = default; - SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) { - subconstants.insert(std::end(subconstants), elements, elements + num_elements); + subconstants.reserve(num_elements); + for (uint32_t i = 0; i < num_elements; i++) + subconstants.push_back(elements[i]); specialization = specialized; } // Construct scalar (32-bit). - SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1128,7 +1271,7 @@ struct SPIRConstant : IVariant } // Construct scalar (64-bit). - SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1138,7 +1281,7 @@ struct SPIRConstant : IVariant } // Construct vectors and matrices. - SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, + SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) @@ -1170,7 +1313,7 @@ struct SPIRConstant : IVariant } } - uint32_t constant_type = 0; + TypeID constant_type = 0; ConstantMatrix m; // If this constant is a specialization constant (i.e. created with OpSpecConstant*). @@ -1182,7 +1325,7 @@ struct SPIRConstant : IVariant bool is_used_as_lut = false; // For composites which are constant arrays, etc. - SmallVector<uint32_t> subconstants; + SmallVector<ConstantID> subconstants; // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, // and uses them to initialize the constant. This allows the user @@ -1317,9 +1460,9 @@ public: return type; } - uint32_t get_id() const + ID get_id() const { - return holder ? holder->self : 0; + return holder ? holder->self : ID(0); } bool empty() const @@ -1368,12 +1511,57 @@ T &variant_set(Variant &var, P &&... args) struct AccessChainMeta { - uint32_t storage_packed_type = 0; + uint32_t storage_physical_type = 0; bool need_transpose = false; bool storage_is_packed = false; bool storage_is_invariant = false; }; +enum ExtendedDecorations +{ + // Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding. + SPIRVCrossDecorationBufferBlockRepacked = 0, + + // A type in a buffer block might be declared with a different physical type than the logical type. + // If this is not set, PhysicalTypeID == the SPIR-V type as declared. + SPIRVCrossDecorationPhysicalTypeID, + + // Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends. + // If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing + // is converting float3 to packed_float3 for example. + // If this is marked on a struct, it means the struct itself must use only Packed types for all its members. + SPIRVCrossDecorationPhysicalTypePacked, + + // The padding in bytes before declaring this struct member. + // If used on a struct type, marks the target size of a struct. + SPIRVCrossDecorationPaddingTarget, + + SPIRVCrossDecorationInterfaceMemberIndex, + SPIRVCrossDecorationInterfaceOrigID, + SPIRVCrossDecorationResourceIndexPrimary, + // Used for decorations like resource indices for samplers when part of combined image samplers. + // A variable might need to hold two resource indices in this case. + SPIRVCrossDecorationResourceIndexSecondary, + // Used for resource indices for multiplanar images when part of combined image samplers. + SPIRVCrossDecorationResourceIndexTertiary, + SPIRVCrossDecorationResourceIndexQuaternary, + + // Marks a buffer block for using explicit offsets (GLSL/HLSL). + SPIRVCrossDecorationExplicitOffset, + + // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(). + // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables. + SPIRVCrossDecorationBuiltInDispatchBase, + + // Apply to a variable that is a function parameter; marks it as being a "dynamic" + // combined image-sampler. In MSL, this is used when a function parameter might hold + // either a regular combined image-sampler or one that has an attached sampler + // Y'CbCr conversion. + SPIRVCrossDecorationDynamicImageSampler, + + SPIRVCrossDecorationCount +}; + struct Meta { struct Decoration @@ -1396,13 +1584,17 @@ struct Meta spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; bool builtin = false; - struct + struct Extended { - uint32_t packed_type = 0; - bool packed = false; - uint32_t ib_member_index = ~(0u); - uint32_t ib_orig_id = 0; - uint32_t argument_buffer_id = ~(0u); + Extended() + { + // MSVC 2013 workaround to init like this. + for (auto &v : values) + v = 0; + } + + Bitset flags; + uint32_t values[SPIRVCrossDecorationCount]; } extended; }; @@ -1510,4 +1702,16 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode) } } // namespace SPIRV_CROSS_NAMESPACE +namespace std +{ +template <SPIRV_CROSS_NAMESPACE::Types type> +struct hash<SPIRV_CROSS_NAMESPACE::TypedID<type>> +{ + size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID<type> &value) const + { + return std::hash<uint32_t>()(value); + } +}; +} // namespace std + #endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp index 90566c1..25966b3 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp @@ -321,6 +321,8 @@ string CompilerCPP::compile() backend.explicit_struct_type = true; backend.use_initializer_list = true; + fixup_type_alias(); + reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); update_active_builtins(); diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp index 6b66b74..c73ecdf 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp @@ -17,6 +17,7 @@ #include "spirv_cross.hpp" #include "GLSL.std.450.h" #include "spirv_cfg.hpp" +#include "spirv_common.hpp" #include "spirv_parser.hpp" #include <algorithm> #include <cstring> @@ -87,6 +88,10 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) bool Compiler::block_is_pure(const SPIRBlock &block) { + // This is a global side effect of the function. + if (block.terminator == SPIRBlock::Kill) + return false; + for (auto &i : block.ops) { auto ops = stream(i); @@ -155,6 +160,10 @@ bool Compiler::block_is_pure(const SPIRBlock &block) // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + case OpDemoteToHelperInvocationEXT: + // This is a global side effect of the function. + return false; + default: break; } @@ -176,7 +185,7 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const { // If the alias master has been specially packed, we will have emitted a clean variant as well, // so skip the name aliasing here. - if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return to_name(type.type_alias); } } @@ -264,6 +273,15 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) return var; } +StorageClass Compiler::get_backing_variable_storage(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + if (var) + return var->storage; + else + return expression_type(ptr).storage; +} + void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) { auto &e = get<SPIRExpression>(expr); @@ -568,7 +586,7 @@ ShaderResources Compiler::get_shader_resources() const return get_shader_resources(nullptr); } -ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> &active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const { return get_shader_resources(&active_variables); } @@ -708,6 +726,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: + case OpArrayLength: // Invalid SPIR-V. if (length < 3) return false; @@ -724,16 +743,16 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t return true; } -unordered_set<uint32_t> Compiler::get_active_interface_variables() const +unordered_set<VariableID> Compiler::get_active_interface_variables() const { // Traverse the call graph and find all interface variables which are in use. - unordered_set<uint32_t> variables; + unordered_set<VariableID> variables; InterfaceVariableAccessHandler handler(*this, variables); traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); // Make sure we preserve output variables which are only initialized, but never accessed by any code. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage == StorageClassOutput && var.initializer != ID(0)) variables.insert(var.self); }); @@ -744,16 +763,18 @@ unordered_set<uint32_t> Compiler::get_active_interface_variables() const return variables; } -void Compiler::set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables) +void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables) { active_interface_variables = move(active_variables); check_active_interface_variables = true; } -ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const { ShaderResources res; + bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant(); + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { auto &type = this->get<SPIRType>(var.basetype); @@ -771,7 +792,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac if (has_decoration(type.self, DecorationBlock)) { res.stage_inputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -787,7 +808,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac if (has_decoration(type.self, DecorationBlock)) { res.stage_outputs.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } else res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); @@ -796,19 +817,19 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) { res.uniform_buffers.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); } // Old way to declare SSBOs. else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)) { res.storage_buffers.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); } // Modern way to declare SSBOs. else if (type.storage == StorageClassStorageBuffer) { res.storage_buffers.push_back( - { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); } // Push constant blocks else if (type.storage == StorageClassPushConstant) @@ -872,65 +893,6 @@ bool Compiler::type_is_block_like(const SPIRType &type) const return false; } -void Compiler::fixup_type_alias() -{ - // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. - // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type - // alias if the slave type is a block type. - ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) { - if (type.type_alias && type_is_block_like(type)) - { - // Become the master. - ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) { - if (other_id == type.self) - return; - - if (other_type.type_alias == type.type_alias) - other_type.type_alias = type.self; - }); - - this->get<SPIRType>(type.type_alias).type_alias = self; - type.type_alias = 0; - } - }); - - ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { - if (type.type_alias && type_is_block_like(type)) - { - // This is not allowed, drop the type_alias. - type.type_alias = 0; - } - }); - - // Reorder declaration of types so that the master of the type alias is always emitted first. - // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which - // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. - auto &type_ids = ir.ids_for_type[TypeType]; - for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) - { - auto &type = get<SPIRType>(*alias_itr); - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) - { - // We will skip declaring this type, so make sure the type_alias type comes before. - auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias); - assert(master_itr != end(type_ids)); - - if (alias_itr < master_itr) - { - // Must also swap the type order for the constant-type joined array. - auto &joined_types = ir.ids_for_constant_or_type; - auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); - auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); - assert(alt_alias_itr != end(joined_types)); - assert(alt_master_itr != end(joined_types)); - - swap(*alias_itr, *master_itr); - swap(*alt_alias_itr, *alt_master_itr); - } - } - } -} - void Compiler::parse_fixup() { // Figure out specialization constants for work group sizes. @@ -964,8 +926,6 @@ void Compiler::parse_fixup() aliased_variables.push_back(var.self); } } - - fixup_type_alias(); } void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary, @@ -1026,17 +986,17 @@ void Compiler::update_name_cache(unordered_set<string> &cache, string &name) update_name_cache(cache, cache, name); } -void Compiler::set_name(uint32_t id, const std::string &name) +void Compiler::set_name(ID id, const std::string &name) { ir.set_name(id, name); } -const SPIRType &Compiler::get_type(uint32_t id) const +const SPIRType &Compiler::get_type(TypeID id) const { return get<SPIRType>(id); } -const SPIRType &Compiler::get_type_from_variable(uint32_t id) const +const SPIRType &Compiler::get_type_from_variable(VariableID id) const { return get<SPIRType>(get<SPIRVariable>(id).basetype); } @@ -1107,23 +1067,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type) type.image.dim != DimBuffer; } -void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, +void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument) { ir.set_member_decoration_string(id, index, decoration, argument); } -void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { ir.set_member_decoration(id, index, decoration, argument); } -void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name) +void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) { ir.set_member_name(id, index, name); } -const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const +const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const { return ir.get_member_name(id, index); } @@ -1139,7 +1099,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const ir.meta[type_id].members[index].qualified_alias = name; } -const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const +const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const { auto *m = ir.find_meta(type_id); if (m && index < m->members.size()) @@ -1148,32 +1108,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind return ir.get_empty_string(); } -uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration(id, index, decoration); } -const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const { return ir.get_member_decoration_bitset(id, index); } -bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.has_member_decoration(id, index, decoration); } -void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { ir.unset_member_decoration(id, index, decoration); } -void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument) +void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) { ir.set_decoration_string(id, decoration, argument); } -void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) { ir.set_decoration(id, decoration, argument); } @@ -1181,28 +1141,8 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) { auto &dec = ir.meta[id].decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = true; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = value; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = value; - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = value; - break; - - case SPIRVCrossDecorationArgumentBufferID: - dec.extended.argument_buffer_id = value; - break; - } + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; } void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, @@ -1210,28 +1150,23 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; +} +static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) +{ switch (decoration) { - case SPIRVCrossDecorationPacked: - dec.extended.packed = true; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = value; - break; - + case SPIRVCrossDecorationResourceIndexPrimary: + case SPIRVCrossDecorationResourceIndexSecondary: + case SPIRVCrossDecorationResourceIndexTertiary: + case SPIRVCrossDecorationResourceIndexQuaternary: case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = value; - break; + return ~(0u); - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = value; - break; - - case SPIRVCrossDecorationArgumentBufferID: - dec.extended.argument_buffer_id = value; - break; + default: + return 0; } } @@ -1242,25 +1177,11 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco return 0; auto &dec = m->decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return uint32_t(dec.extended.packed); - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type; + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index; - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id; - - case SPIRVCrossDecorationArgumentBufferID: - return dec.extended.argument_buffer_id; - } - - return 0; + return dec.extended.values[decoration]; } uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const @@ -1273,25 +1194,9 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, return 0; auto &dec = m->members[index]; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return uint32_t(dec.extended.packed); - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index; - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id; - - case SPIRVCrossDecorationArgumentBufferID: - return dec.extended.argument_buffer_id; - } - - return 0; + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); + return dec.extended.values[decoration]; } bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const @@ -1301,25 +1206,7 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati return false; auto &dec = m->decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return dec.extended.packed; - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type != 0; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index != uint32_t(-1); - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id != 0; - - case SPIRVCrossDecorationArgumentBufferID: - return dec.extended.argument_buffer_id != 0; - } - - return false; + return dec.extended.flags.get(decoration); } bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const @@ -1332,99 +1219,40 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext return false; auto &dec = m->members[index]; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - return dec.extended.packed; - - case SPIRVCrossDecorationPackedType: - return dec.extended.packed_type != 0; - - case SPIRVCrossDecorationInterfaceMemberIndex: - return dec.extended.ib_member_index != uint32_t(-1); - - case SPIRVCrossDecorationInterfaceOrigID: - return dec.extended.ib_orig_id != 0; - - case SPIRVCrossDecorationArgumentBufferID: - return dec.extended.argument_buffer_id != uint32_t(-1); - } - - return false; + return dec.extended.flags.get(decoration); } void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) { auto &dec = ir.meta[id].decoration; - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = false; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = 0; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = ~(0u); - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = 0; - break; - - case SPIRVCrossDecorationArgumentBufferID: - dec.extended.argument_buffer_id = 0; - break; - } + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; } void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) { ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); auto &dec = ir.meta[type].members[index]; - - switch (decoration) - { - case SPIRVCrossDecorationPacked: - dec.extended.packed = false; - break; - - case SPIRVCrossDecorationPackedType: - dec.extended.packed_type = 0; - break; - - case SPIRVCrossDecorationInterfaceMemberIndex: - dec.extended.ib_member_index = ~(0u); - break; - - case SPIRVCrossDecorationInterfaceOrigID: - dec.extended.ib_orig_id = 0; - break; - - case SPIRVCrossDecorationArgumentBufferID: - dec.extended.argument_buffer_id = 0; - break; - } + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; } -StorageClass Compiler::get_storage_class(uint32_t id) const +StorageClass Compiler::get_storage_class(VariableID id) const { return get<SPIRVariable>(id).storage; } -const std::string &Compiler::get_name(uint32_t id) const +const std::string &Compiler::get_name(ID id) const { return ir.get_name(id); } -const std::string Compiler::get_fallback_name(uint32_t id) const +const std::string Compiler::get_fallback_name(ID id) const { return join("_", id); } -const std::string Compiler::get_block_fallback_name(uint32_t id) const +const std::string Compiler::get_block_fallback_name(VariableID id) const { auto &var = get<SPIRVariable>(id); if (get_name(id).empty()) @@ -1433,37 +1261,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const return get_name(id); } -const Bitset &Compiler::get_decoration_bitset(uint32_t id) const +const Bitset &Compiler::get_decoration_bitset(ID id) const { return ir.get_decoration_bitset(id); } -bool Compiler::has_decoration(uint32_t id, Decoration decoration) const +bool Compiler::has_decoration(ID id, Decoration decoration) const { return ir.has_decoration(id, decoration); } -const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const +const string &Compiler::get_decoration_string(ID id, Decoration decoration) const { return ir.get_decoration_string(id, decoration); } -const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration_string(id, index, decoration); } -uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +uint32_t Compiler::get_decoration(ID id, Decoration decoration) const { return ir.get_decoration(id, decoration); } -void Compiler::unset_decoration(uint32_t id, Decoration decoration) +void Compiler::unset_decoration(ID id, Decoration decoration) { ir.unset_decoration(id, decoration); } -bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const +bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const { auto *m = ir.find_meta(id); if (!m) @@ -1587,38 +1415,6 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method return false; } -bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) -{ - auto *start = &from; - - if (start->self == to.self) - return true; - - // Break cycles. - if (is_continue(start->self)) - return false; - - // If our select block doesn't merge, we must break or continue in these blocks, - // so if continues occur branchless within these blocks, consider them branchless as well. - // This is typically used for loop control. - if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone && - (block_is_outside_flow_control_from_block(get<SPIRBlock>(start->true_block), to) || - block_is_outside_flow_control_from_block(get<SPIRBlock>(start->false_block), to))) - { - return true; - } - else if (start->merge_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->merge_block), to)) - { - return true; - } - else if (start->next_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->next_block), to)) - { - return true; - } - else - return false; -} - bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const { if (!execution_is_branchless(from, to)) @@ -1658,6 +1454,11 @@ bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &t } } +bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const +{ + return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self; +} + SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const { // The block was deemed too complex during code emit, pick conservative fallback paths. @@ -1669,6 +1470,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc if (block.merge == SPIRBlock::MergeLoop) return SPIRBlock::WhileLoop; + if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) + { + // Continue block is never reached from CFG. + return SPIRBlock::ComplexLoop; + } + auto &dominator = get<SPIRBlock>(block.loop_dominator); if (execution_is_noop(block, dominator)) @@ -1681,6 +1488,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc const auto *true_block = maybe_get<SPIRBlock>(block.true_block); const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block); + // If we need to flush Phi in this block, we cannot have a DoWhile loop. + bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block); + bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block); + if (flush_phi_to_false || flush_phi_to_true) + return SPIRBlock::ComplexLoop; + bool positive_do_while = block.true_block == dominator.self && (block.false_block == dominator.merge_block || (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); @@ -1702,6 +1515,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); + handler.rearm_current_block(block); // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing @@ -1725,6 +1539,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand return false; if (!handler.end_function_scope(ops, i.length)) return false; + + handler.rearm_current_block(block); } } } @@ -1921,7 +1737,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint return true; } -SmallVector<BufferRange> Compiler::get_active_buffer_ranges(uint32_t id) const +SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const { SmallVector<BufferRange> ranges; BufferAccessHandler handler(*this, ranges, id); @@ -2014,19 +1830,19 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo { auto &c = get<SPIRConstant>(execution.workgroup_size.constant); - if (c.m.c[0].id[0] != 0) + if (c.m.c[0].id[0] != ID(0)) { x.id = c.m.c[0].id[0]; x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); } - if (c.m.c[0].id[1] != 0) + if (c.m.c[0].id[1] != ID(0)) { y.id = c.m.c[0].id[1]; y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); } - if (c.m.c[0].id[2] != 0) + if (c.m.c[0].id[2] != ID(0)) { z.id = c.m.c[0].id[2]; z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); @@ -2081,36 +1897,36 @@ bool Compiler::is_tessellation_shader() const return is_tessellation_shader(get_execution_model()); } -void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable) +void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) { get<SPIRVariable>(id).remapped_variable = remap_enable; } -bool Compiler::get_remapped_variable_state(uint32_t id) const +bool Compiler::get_remapped_variable_state(VariableID id) const { return get<SPIRVariable>(id).remapped_variable; } -void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components) +void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) { get<SPIRVariable>(id).remapped_components = components; } -uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const +uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const { return get<SPIRVariable>(id).remapped_components; } void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } @@ -2251,7 +2067,7 @@ bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const return true; auto &execution = get_entry_point(); - return find(begin(execution.interface_variables), end(execution.interface_variables), id) != + return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != end(execution.interface_variables); } @@ -2331,8 +2147,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a { for (auto ¶m : params) { - uint32_t image_id = param.global_image ? param.image_id : args[param.image_id]; - uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id]; + VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); + VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); auto *i = compiler.maybe_get_backing_variable(image_id); auto *s = compiler.maybe_get_backing_variable(sampler_id); @@ -2341,15 +2157,17 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a if (s) sampler_id = s->self; - register_combined_image_sampler(caller, image_id, sampler_id, param.depth); + register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth); } } return true; } -void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id, - uint32_t sampler_id, bool depth) +void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, + VariableID combined_module_id, + VariableID image_id, VariableID sampler_id, + bool depth) { // We now have a texture ID and a sampler ID which will either be found as a global // or a parameter in our own function. If both are global, they will not need a parameter, @@ -2409,12 +2227,15 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR // Build new variable. compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0); - // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). - auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; - auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags; - new_flags.reset(); - if (old_flags.get(DecorationRelaxedPrecision)) - new_flags.set(DecorationRelaxedPrecision); + // Inherit RelaxedPrecision. + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) || + compiler.has_decoration(image_id, DecorationRelaxedPrecision) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); param.id = combined_id; @@ -2621,8 +2442,10 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar if (sampler) sampler_id = sampler->self; + uint32_t combined_id = args[1]; + auto &combined_type = compiler.get<SPIRType>(args[0]); - register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth); + register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth); } } @@ -2630,8 +2453,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar // This information is statically known from the current place in the call stack. // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know // which backing variable the image/sample came from. - uint32_t image_id = remap_parameter(args[2]); - uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); + VariableID image_id = remap_parameter(args[2]); + VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), [image_id, sampler_id](const CombinedImageSampler &combined) { @@ -2641,6 +2464,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar if (itr == end(compiler.combined_image_samplers)) { uint32_t sampled_type; + uint32_t combined_module_id; if (is_fetch) { // Have to invent the sampled image type. @@ -2650,10 +2474,12 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar type.self = sampled_type; type.basetype = SPIRType::SampledImage; type.image.depth = false; + combined_module_id = 0; } else { sampled_type = args[0]; + combined_module_id = args[1]; } auto id = compiler.ir.increase_bound_by(2); @@ -2673,12 +2499,14 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0); // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). - auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; - // Fetch inherits precision from the image, not sampler (there is no sampler). - auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags; - new_flags.reset(); - if (old_flags.get(DecorationRelaxedPrecision)) - new_flags.set(DecorationRelaxedPrecision); + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) || + (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); // Propagate the array type for the original image as well. auto *var = compiler.maybe_get_backing_variable(image_id); @@ -2695,7 +2523,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar return true; } -uint32_t Compiler::build_dummy_sampler_for_combined_images() +VariableID Compiler::build_dummy_sampler_for_combined_images() { DummySamplerForCombinedImageHandler handler(*this); traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); @@ -2749,12 +2577,12 @@ SmallVector<SpecializationConstant> Compiler::get_specialization_constants() con return spec_consts; } -SPIRConstant &Compiler::get_constant(uint32_t id) +SPIRConstant &Compiler::get_constant(ConstantID id) { return get<SPIRConstant>(id); } -const SPIRConstant &Compiler::get_constant(uint32_t id) const +const SPIRConstant &Compiler::get_constant(ConstantID id) const { return get<SPIRConstant>(id); } @@ -2938,7 +2766,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t ptr = args[0]; + ID ptr = args[0]; auto *var = compiler.maybe_get_backing_variable(ptr); // If we store through an access chain, we have a partial write. @@ -2983,7 +2811,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // The result of an access chain is a fixed expression and is not really considered a temporary. auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true); auto *backing_variable = compiler.maybe_get_backing_variable(ptr); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[args[1]].set_allow_type_rewrite(); @@ -2996,8 +2824,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t lhs = args[0]; - uint32_t rhs = args[1]; + ID lhs = args[0]; + ID rhs = args[1]; auto *var = compiler.maybe_get_backing_variable(lhs); // If we store through an access chain, we have a partial write. @@ -3096,6 +2924,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 } case OpArrayLength: + case OpLine: + case OpNoLine: // Uses literals, but cannot be a phi variable or temporary, so ignore. break; @@ -3334,11 +3164,34 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA unordered_map<uint32_t, uint32_t> potential_loop_variables; + // Find the loop dominator block for each block. + for (auto &block_id : entry.blocks) + { + auto &block = get<SPIRBlock>(block_id); + + auto itr = ir.continue_block_to_loop_header.find(block_id); + if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id) + { + // Continue block might be unreachable in the CFG, but we still like to know the loop dominator. + // Edge case is when continue block is also the loop header, don't set the dominator in this case. + block.loop_dominator = itr->second; + } + else + { + uint32_t loop_dominator = cfg.find_loop_dominator(block_id); + if (loop_dominator != block_id) + block.loop_dominator = loop_dominator; + else + block.loop_dominator = SPIRBlock::NoDominator; + } + } + // For each variable which is statically accessed. for (auto &var : handler.accessed_variables_to_block) { // Only deal with variables which are considered local variables in this function. - if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == + end(entry.local_variables)) continue; DominatorBuilder builder(cfg); @@ -3379,7 +3232,35 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA builder.lift_continue_block_dominator(); // Add it to a per-block list of variables. - uint32_t dominating_block = builder.get_dominator(); + BlockID dominating_block = builder.get_dominator(); + + // For variables whose dominating block is inside a loop, there is a risk that these variables + // actually need to be preserved across loop iterations. We can express this by adding + // a "read" access to the loop header. + // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable. + // Should that fail, we look for the outermost loop header and tack on an access there. + // Phi nodes cannot have this problem. + if (dominating_block) + { + auto &variable = get<SPIRVariable>(var.first); + if (!variable.phi_variable) + { + auto *block = &get<SPIRBlock>(dominating_block); + bool preserve = may_read_undefined_variable_in_block(*block, var.first); + if (preserve) + { + // Find the outermost loop scope. + while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) + block = &get<SPIRBlock>(block->loop_dominator); + + if (block->self != dominating_block) + { + builder.add_block(block->self); + dominating_block = builder.get_dominator(); + } + } + } + } // If all blocks here are dead code, this will be 0, so the variable in question // will be completely eliminated. @@ -3416,10 +3297,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { builder.add_block(block); - // If a temporary is used in more than one block, we might have to lift continue block - // access up to loop header like we did for variables. if (blocks.size() != 1 && is_continue(block)) { + // The risk here is that inner loop can dominate the continue block. + // Any temporary we access in the continue block must be declared before the loop. + // This is moot for complex loops however. auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]); assert(loop_header_block.merge == SPIRBlock::MergeLoop); @@ -3427,14 +3309,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA if (!loop_header_block.complex_continue) builder.add_block(loop_header_block.self); } - else if (blocks.size() != 1 && is_single_block_loop(block)) - { - // Awkward case, because the loop header is also the continue block. - force_temporary = true; - } } uint32_t dominating_block = builder.get_dominator(); + + if (blocks.size() != 1 && is_single_block_loop(dominating_block)) + { + // Awkward case, because the loop header is also the continue block, + // so hoisting to loop header does not help. + force_temporary = true; + } + if (dominating_block) { // If we touch a variable in the dominating block, this is the expected setup. @@ -3485,17 +3370,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { auto &var = get<SPIRVariable>(loop_variable.first); auto dominator = var.dominator; - auto block = loop_variable.second; + BlockID block = loop_variable.second; // The variable was accessed in multiple continue blocks, ignore. - if (block == ~(0u) || block == 0) + if (block == BlockID(~(0u)) || block == BlockID(0)) continue; // Dead code. - if (dominator == 0) + if (dominator == ID(0)) continue; - uint32_t header = 0; + BlockID header = 0; // Find the loop header for this block if we are a continue block. { @@ -3554,10 +3439,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA // merge can occur. Walk the CFG to see if we find anything. seen_blocks.clear(); - cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) { + cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { // We found a block which accesses the variable outside the loop. if (blocks.find(walk_block) != end(blocks)) static_loop_init = false; + return true; }); if (!static_loop_init) @@ -3572,7 +3458,80 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA } } -Bitset Compiler::get_buffer_block_flags(uint32_t id) const +bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var) +{ + for (auto &op : block.ops) + { + auto *ops = stream(op); + switch (op.op) + { + case OpStore: + case OpCopyMemory: + if (ops[0] == var) + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + // Access chains are generally used to partially read and write. It's too hard to analyze + // if all constituents are written fully before continuing, so just assume it's preserved. + // This is the same as the parameter preservation analysis. + if (ops[2] == var) + return true; + break; + + case OpSelect: + // Variable pointers. + // We might read before writing. + if (ops[3] == var || ops[4] == var) + return true; + break; + + case OpPhi: + { + // Variable pointers. + // We might read before writing. + if (op.length < 2) + break; + + uint32_t count = op.length - 2; + for (uint32_t i = 0; i < count; i += 2) + if (ops[i + 2] == var) + return true; + break; + } + + case OpCopyObject: + case OpLoad: + if (ops[2] == var) + return true; + break; + + case OpFunctionCall: + { + if (op.length < 3) + break; + + // May read before writing. + uint32_t count = op.length - 3; + for (uint32_t i = 0; i < count; i++) + if (ops[i + 3] == var) + return true; + break; + } + + default: + break; + } + } + + // Not accessed somehow, at least not in a usual fashion. + // It's likely accessed in a branch, so assume we must preserve. + return true; +} + +Bitset Compiler::get_buffer_block_flags(VariableID id) const { return ir.get_buffer_block_flags(get<SPIRVariable>(id)); } @@ -3848,6 +3807,20 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin return true; } +const CFG &Compiler::get_cfg_for_current_function() const +{ + assert(current_function); + return get_cfg_for_function(current_function->self); +} + +const CFG &Compiler::get_cfg_for_function(uint32_t id) const +{ + auto cfg_itr = function_cfgs.find(id); + assert(cfg_itr != end(function_cfgs)); + assert(cfg_itr->second); + return *cfg_itr->second; +} + void Compiler::build_function_control_flow_graphs_and_analyze() { CFGBuilder handler(*this); @@ -3997,13 +3970,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ return true; } -bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const +bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const { auto *m = ir.find_meta(id); return m && m->hlsl_is_magic_counter_buffer; } -bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const +bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const { auto *m = ir.find_meta(id); @@ -4068,20 +4041,69 @@ const SmallVector<std::string> &Compiler::get_declared_extensions() const return ir.declared_extensions; } -std::string Compiler::get_remapped_declared_block_name(uint32_t id) const +std::string Compiler::get_remapped_declared_block_name(VariableID id) const +{ + return get_remapped_declared_block_name(id, false); +} + +std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const { auto itr = declared_block_names.find(id); if (itr != end(declared_block_names)) + { return itr->second; + } else { auto &var = get<SPIRVariable>(id); - auto &type = get<SPIRType>(var.basetype); - auto *type_meta = ir.find_meta(type.self); - auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; - return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; + if (fallback_prefer_instance_name) + { + return to_name(var.self); + } + else + { + auto &type = get<SPIRType>(var.basetype); + auto *type_meta = ir.find_meta(type.self); + auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; + return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; + } + } +} + +bool Compiler::reflection_ssbo_instance_name_is_significant() const +{ + if (ir.source.known) + { + // UAVs from HLSL source tend to be declared in a way where the type is reused + // but the instance name is significant, and that's the name we should report. + // For GLSL, SSBOs each have their own block type as that's how GLSL is written. + return ir.source.hlsl; } + + unordered_set<uint32_t> ssbo_type_ids; + bool aliased_ssbo_types = false; + + // If we don't have any OpSource information, we need to perform some shaky heuristics. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + if (!type.pointer || var.storage == StorageClassFunction) + return; + + bool ssbo = var.storage == StorageClassStorageBuffer || + (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)); + + if (ssbo) + { + if (ssbo_type_ids.count(type.self)) + aliased_ssbo_types = true; + else + ssbo_type_ids.insert(type.self); + } + }); + + // If the block name is aliased, assume we have HLSL-style UAV declarations. + return aliased_ssbo_types; } bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, @@ -4110,6 +4132,8 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul case OpCommitWritePipe: case OpGroupCommitReadPipe: case OpGroupCommitWritePipe: + case OpLine: + case OpNoLine: return false; default: @@ -4242,6 +4266,316 @@ void Compiler::analyze_non_block_pointer_types() sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); } +bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) +{ + if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) + { + if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) + { + // Most complex case, we have no sensible way of dealing with this + // other than taking the 100% conservative approach, exit early. + split_function_case = true; + return false; + } + else + { + interlock_function_id = call_stack.back(); + // If this call is performed inside control flow we have a problem. + auto &cfg = compiler.get_cfg_for_function(interlock_function_id); + + uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block; + bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); + if (!outside_control_flow) + control_flow_interlock = true; + } + } + return true; +} + +void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) +{ + current_block_id = block.self; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) +{ + call_stack.pop_back(); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + if (args[2] == interlock_function_id) + call_stack_is_interlocked = true; + + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) +{ + if (call_stack.back() == interlock_function_id) + call_stack_is_interlocked = false; + + call_stack.pop_back(); + return true; +} + +void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) +{ + if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || + split_function_case) + { + compiler.interlocked_resources.insert(id); + } +} + +bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Only care about critical section analysis if we have simple case. + if (use_critical_section) + { + if (opcode == OpBeginInvocationInterlockEXT) + { + in_crit_sec = true; + return true; + } + + if (opcode == OpEndInvocationInterlockEXT) + { + // End critical section--nothing more to do. + return false; + } + } + + // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniformConstant: + { + uint32_t result_type = args[0]; + uint32_t id = args[1]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get<SPIRType>(result_type); + if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassStorageBuffer) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + } + break; + } + + case OpImageTexelPointer: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + auto &e = compiler.set<SPIRExpression>(id, "", result_type, true); + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + e.loaded_from = var->self; + break; + } + + case OpStore: + case OpImageWrite: + case OpAtomicStore: + { + if (length < 1) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t dst = args[0]; + uint32_t src = args[1]; + auto *dst_var = compiler.maybe_get_backing_variable(dst); + auto *src_var = compiler.maybe_get_backing_variable(src); + + if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) + access_potential_resource(dst_var->self); + + if (src_var) + { + if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) + break; + + if (src_var->storage == StorageClassUniform && + !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock)) + { + break; + } + + access_potential_resource(src_var->self); + } + + break; + } + + case OpImageRead: + case OpAtomicLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassUniformConstant: + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + default: + break; + } + + return true; +} + +void Compiler::analyze_interlocked_resource_usage() +{ + if (get_execution_model() == ExecutionModelFragment && + (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) + { + InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler); + + InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); + handler.interlock_function_id = prepass_handler.interlock_function_id; + handler.split_function_case = prepass_handler.split_function_case; + handler.control_flow_interlock = prepass_handler.control_flow_interlock; + handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; + + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + + // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. + interlocked_is_complex = + !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; + } +} + bool Compiler::type_is_array_of_pointers(const SPIRType &type) const { if (!type.pointer) @@ -4250,3 +4584,12 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const // If parent type has same pointer depth, we must have an array of pointers. return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth; } + +bool Compiler::flush_phi_required(BlockID from, BlockID to) const +{ + auto &child = get<SPIRBlock>(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp index 4129e81..7385a6c 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp @@ -27,18 +27,18 @@ struct Resource { // Resources are identified with their SPIR-V ID. // This is the ID of the OpVariable. - uint32_t id; + ID id; // The type ID of the variable which includes arrays and all type modifications. // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general // since these modifications typically happen on the base_type_id. - uint32_t type_id; + TypeID type_id; // The base type of the declared resource. // This type is the base type which ignores pointers and arrays of the type_id. // This is mostly useful to parse decorations of the underlying type. // base_type_id can also be obtained with get_type(get_type(type_id).self). - uint32_t base_type_id; + TypeID base_type_id; // The declared name (OpName) of the resource. // For Buffer blocks, the name actually reflects the externally @@ -77,17 +77,17 @@ struct ShaderResources struct CombinedImageSampler { // The ID of the sampler2D variable. - uint32_t combined_id; + VariableID combined_id; // The ID of the texture2D variable. - uint32_t image_id; + VariableID image_id; // The ID of the sampler variable. - uint32_t sampler_id; + VariableID sampler_id; }; struct SpecializationConstant { // The ID of the specialization constant. - uint32_t id; + ConstantID id; // The constant ID of the constant, used in Vulkan during pipeline creation. uint32_t constant_id; }; @@ -117,15 +117,6 @@ struct EntryPoint spv::ExecutionModel execution_model; }; -enum ExtendedDecorations -{ - SPIRVCrossDecorationPacked, - SPIRVCrossDecorationPackedType, - SPIRVCrossDecorationInterfaceMemberIndex, - SPIRVCrossDecorationInterfaceOrigID, - SPIRVCrossDecorationArgumentBufferID -}; - class Compiler { public: @@ -151,81 +142,81 @@ public: virtual std::string compile(); // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. - const std::string &get_name(uint32_t id) const; + const std::string &get_name(ID id) const; // Applies a decoration to an ID. Effectively injects OpDecorate. - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); // Overrides the identifier OpName of an ID. // Identifiers beginning with underscores or identifiers which contain double underscores // are reserved by the implementation. - void set_name(uint32_t id, const std::string &name); + void set_name(ID id, const std::string &name); // Gets a bitmask for the decorations which are applied to ID. // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) - const Bitset &get_decoration_bitset(uint32_t id) const; + const Bitset &get_decoration_bitset(ID id) const; // Returns whether the decoration has been applied to the ID. - bool has_decoration(uint32_t id, spv::Decoration decoration) const; + bool has_decoration(ID id, spv::Decoration decoration) const; // Gets the value for decorations which take arguments. // If the decoration is a boolean (i.e. spv::DecorationNonWritable), // 1 will be returned. // If decoration doesn't exist or decoration is not recognized, // 0 will be returned. - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; // Removes the decoration for an ID. - void unset_decoration(uint32_t id, spv::Decoration decoration); + void unset_decoration(ID id, spv::Decoration decoration); // Gets the SPIR-V type associated with ID. // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. - const SPIRType &get_type(uint32_t id) const; + const SPIRType &get_type(TypeID id) const; // Gets the SPIR-V type of a variable. - const SPIRType &get_type_from_variable(uint32_t id) const; + const SPIRType &get_type_from_variable(VariableID id) const; // Gets the underlying storage class for an OpVariable. - spv::StorageClass get_storage_class(uint32_t id) const; + spv::StorageClass get_storage_class(VariableID id) const; // If get_name() is an empty string, get the fallback name which will be used // instead in the disassembled source. - virtual const std::string get_fallback_name(uint32_t id) const; + virtual const std::string get_fallback_name(ID id) const; // If get_name() of a Block struct is an empty string, get the fallback name. // This needs to be per-variable as multiple variables can use the same block type. - virtual const std::string get_block_fallback_name(uint32_t id) const; + virtual const std::string get_block_fallback_name(VariableID id) const; // Given an OpTypeStruct in ID, obtain the identifier for member number "index". // This may be an empty string. - const std::string &get_member_name(uint32_t id, uint32_t index) const; + const std::string &get_member_name(TypeID id, uint32_t index) const; // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; // Sets the member identifier for OpTypeStruct ID, member number "index". - void set_member_name(uint32_t id, uint32_t index, const std::string &name); + void set_member_name(TypeID id, uint32_t index, const std::string &name); // Returns the qualified member identifier for OpTypeStruct ID, member number "index", // or an empty string if no qualified alias exists - const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const; + const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const; // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; // Returns whether the decoration has been applied to a member of a struct. - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; // Similar to set_decoration, but for struct members. - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); // Unsets a member decoration, similar to unset_decoration. - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); // Gets the fallback name for a member, similar to get_fallback_name. virtual const std::string get_fallback_member_name(uint32_t index) const @@ -237,7 +228,7 @@ public: // SPIR-V shader. The granularity of this analysis is per-member of a struct. // This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks. // ID is the Resource::id obtained from get_shader_resources(). - SmallVector<BufferRange> get_active_buffer_ranges(uint32_t id) const; + SmallVector<BufferRange> get_active_buffer_ranges(VariableID id) const; // Returns the effective size of a buffer block. size_t get_declared_struct_size(const SPIRType &struct_type) const; @@ -255,7 +246,7 @@ public: size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const; // Returns the effective size of a buffer block struct member. - virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; // Returns a set of all global variables which are statically accessed // by the control flow graph from the current entry point. @@ -265,12 +256,12 @@ public: // // To use the returned set as the filter for which variables are used during compilation, // this set can be moved to set_enabled_interface_variables(). - std::unordered_set<uint32_t> get_active_interface_variables() const; + std::unordered_set<VariableID> get_active_interface_variables() const; // Sets the interface variables which are used during compilation. // By default, all variables are used. // Once set, compile() will only consider the set in active_variables. - void set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables); + void set_enabled_interface_variables(std::unordered_set<VariableID> active_variables); // Query shader resources, use ids with reflection interface to modify or query binding points, etc. ShaderResources get_shader_resources() const; @@ -278,19 +269,19 @@ public: // Query shader resources, but only return the variables which are part of active_variables. // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically // accessed. - ShaderResources get_shader_resources(const std::unordered_set<uint32_t> &active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set<VariableID> &active_variables) const; // Remapped variables are considered built-in variables and a backend will // not emit a declaration for this variable. // This is mostly useful for making use of builtins which are dependent on extensions. - void set_remapped_variable_state(uint32_t id, bool remap_enable); - bool get_remapped_variable_state(uint32_t id) const; + void set_remapped_variable_state(VariableID id, bool remap_enable); + bool get_remapped_variable_state(VariableID id) const; // For subpassInput variables which are remapped to plain variables, // the number of components in the remapped // variable must be specified as the backing type of subpass inputs are opaque. - void set_subpass_input_remapped_components(uint32_t id, uint32_t components); - uint32_t get_subpass_input_remapped_components(uint32_t id) const; + void set_subpass_input_remapped_components(VariableID id, uint32_t components); + uint32_t get_subpass_input_remapped_components(VariableID id) const; // All operations work on the current entry point. // Entry points can be swapped out with set_entry_point(). @@ -371,7 +362,7 @@ public: // If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile(). // Calling this function also invalidates get_active_interface_variables(), so this should be called // before that function. - uint32_t build_dummy_sampler_for_combined_images(); + VariableID build_dummy_sampler_for_combined_images(); // Analyzes all separate image and samplers used from the currently selected entry point, // and re-routes them all to a combined image sampler instead. @@ -420,8 +411,8 @@ public: // constant_type is the SPIRType for the specialization constant, // which can be queried to determine which fields in the unions should be poked at. SmallVector<SpecializationConstant> get_specialization_constants() const; - SPIRConstant &get_constant(uint32_t id); - const SPIRConstant &get_constant(uint32_t id) const; + SPIRConstant &get_constant(ConstantID id); + const SPIRConstant &get_constant(ConstantID id) const; uint32_t get_current_id_bound() const { @@ -444,7 +435,7 @@ public: // If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true, // otherwise, returns false. // If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false. - bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const; + bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const; // HLSL counter buffer reflection interface. // Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where @@ -459,7 +450,7 @@ public: // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_is_hlsl_counter_buffer(uint32_t id) const; + bool buffer_is_hlsl_counter_buffer(VariableID id) const; // Queries if a buffer object has a neighbor "counter" buffer. // If so, the ID of that counter buffer will be returned in counter_id. @@ -467,7 +458,7 @@ public: // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const; + bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const; // Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module. const SmallVector<spv::Capability> &get_declared_capabilities() const; @@ -488,13 +479,13 @@ public: // ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type. // // This also applies to HLSL cbuffers. - std::string get_remapped_declared_block_name(uint32_t id) const; + std::string get_remapped_declared_block_name(VariableID id) const; // For buffer block variables, get the decorations for that variable. // Sometimes, decorations for buffer blocks are found in member decorations instead // of direct decorations on the variable itself. // The most common use here is to check if a buffer is readonly or writeonly. - Bitset get_buffer_block_flags(uint32_t id) const; + Bitset get_buffer_block_flags(VariableID id) const; protected: const uint32_t *stream(const Instruction &instr) const @@ -518,7 +509,7 @@ protected: SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; - std::unordered_set<uint32_t> active_interface_variables; + std::unordered_set<VariableID> active_interface_variables; bool check_active_interface_variables = false; // If our IDs are out of range here as part of opcodes, throw instead of @@ -558,7 +549,9 @@ protected: template <typename T> const T *maybe_get(uint32_t id) const { - if (ir.ids[id].get_type() == static_cast<Types>(T::type)) + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast<Types>(T::type)) return &get<T>(id); else return nullptr; @@ -614,6 +607,7 @@ protected: bool expression_is_lvalue(uint32_t id) const; bool variable_storage_is_aliased(const SPIRVariable &var); SPIRVariable *maybe_get_backing_variable(uint32_t chain); + spv::StorageClass get_backing_variable_storage(uint32_t ptr); void register_read(uint32_t expr, uint32_t chain, bool forwarded); void register_write(uint32_t chain); @@ -626,7 +620,7 @@ protected: inline bool is_single_block_loop(uint32_t next) const { auto &block = get<SPIRBlock>(next); - return block.merge == SPIRBlock::MergeLoop && block.continue_block == next; + return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next); } inline bool is_break(uint32_t next) const @@ -666,9 +660,9 @@ protected: bool function_is_pure(const SPIRFunction &func); bool block_is_pure(const SPIRBlock &block); - bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const; bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const; SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; @@ -718,6 +712,13 @@ protected: { } + // Called after returning from a function or when entering a block, + // can be called multiple times per block, + // while set_current_block is only called on block entry. + virtual void rearm_current_block(const SPIRBlock &) + { + } + virtual bool begin_function_scope(const uint32_t *, uint32_t) { return true; @@ -749,7 +750,7 @@ protected: struct InterfaceVariableAccessHandler : OpcodeHandler { - InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<uint32_t> &variables_) + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<VariableID> &variables_) : compiler(compiler_) , variables(variables_) { @@ -758,7 +759,7 @@ protected: bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; const Compiler &compiler; - std::unordered_set<uint32_t> &variables; + std::unordered_set<VariableID> &variables; }; struct CombinedImageSamplerHandler : OpcodeHandler @@ -780,8 +781,8 @@ protected: uint32_t remap_parameter(uint32_t id); void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); void pop_remap_parameters(); - void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id, - bool depth); + void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id, + VariableID sampler_id, bool depth); }; struct DummySamplerForCombinedImageHandler : OpcodeHandler @@ -814,7 +815,7 @@ protected: // This must be an ordered data structure so we always pick the same type aliases. SmallVector<uint32_t> global_struct_cache; - ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set<VariableID> *active_variables) const; VariableTypeRemapCallback variable_remap_callback; @@ -822,7 +823,9 @@ protected: std::unordered_set<uint32_t> forced_temporaries; std::unordered_set<uint32_t> forwarded_temporaries; + std::unordered_set<uint32_t> suppressed_usage_tracking; std::unordered_set<uint32_t> hoisted_temporaries; + std::unordered_set<uint32_t> forced_invariant_temporaries; Bitset active_input_builtins; Bitset active_output_builtins; @@ -889,9 +892,12 @@ protected: void build_function_control_flow_graphs_and_analyze(); std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs; + const CFG &get_cfg_for_current_function() const; + const CFG &get_cfg_for_function(uint32_t id) const; + struct CFGBuilder : OpcodeHandler { - CFGBuilder(Compiler &compiler_); + explicit CFGBuilder(Compiler &compiler_); bool follow_function_call(const SPIRFunction &func) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; @@ -936,7 +942,7 @@ protected: struct PhysicalStorageBufferPointerHandler : OpcodeHandler { - PhysicalStorageBufferPointerHandler(Compiler &compiler_); + explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_); bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; Compiler &compiler; std::unordered_set<uint32_t> types; @@ -947,6 +953,62 @@ protected: void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler, bool single_function); + bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + + // Finds all resources that are written to from inside the critical section, if present. + // The critical section is delimited by OpBeginInvocationInterlockEXT and + // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written + // while inside the critical section must be placed in a raster order group. + struct InterlockedResourceAccessHandler : OpcodeHandler + { + InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool in_crit_sec = false; + + uint32_t interlock_function_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + bool use_critical_section = false; + bool call_stack_is_interlocked = false; + SmallVector<uint32_t> call_stack; + + void access_potential_resource(uint32_t id); + }; + + struct InterlockedResourceAccessPrepassHandler : OpcodeHandler + { + InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + void rearm_current_block(const SPIRBlock &block) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t interlock_function_id = 0; + uint32_t current_block_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + SmallVector<uint32_t> call_stack; + }; + + void analyze_interlocked_resource_usage(); + // The set of all resources written while inside the critical section, if present. + std::unordered_set<uint32_t> interlocked_resources; + bool interlocked_is_complex = false; void make_constant_null(uint32_t id, uint32_t type); @@ -972,15 +1034,18 @@ protected: void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); bool type_is_array_of_pointers(const SPIRType &type) const; + bool type_is_block_like(const SPIRType &type) const; + bool type_is_opaque_value(const SPIRType &type) const; + + bool reflection_ssbo_instance_name_is_significant() const; + std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const; + + bool flush_phi_required(BlockID from, BlockID to) const; private: // Used only to implement the old deprecated get_entry_point() interface. const SPIREntryPoint &get_first_entry_point(const std::string &name) const; SPIREntryPoint &get_first_entry_point(const std::string &name); - - void fixup_type_alias(); - bool type_is_block_like(const SPIRType &type) const; - bool type_is_opaque_value(const SPIRType &type) const; }; } // namespace SPIRV_CROSS_NAMESPACE diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp index d3352d9..f6e63b4 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp @@ -33,6 +33,11 @@ #if SPIRV_CROSS_C_API_REFLECT #include "spirv_reflect.hpp" #endif + +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION +#include "gitversion.h" +#endif + #include "spirv_parser.hpp" #include <memory> #include <new> @@ -157,7 +162,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation struct spvc_set_s : ScratchMemoryAllocation { - std::unordered_set<uint32_t> set; + std::unordered_set<VariableID> set; }; // Dummy-inherit to we can keep our opaque type handle type safe in C-land as well, @@ -412,6 +417,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_FLIP_VERTEX_Y: options->glsl.vertex.flip_vert_y = value != 0; break; + case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES: + options->glsl.emit_line_directives = value != 0; + break; case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE: options->glsl.vertex.support_nonzero_base_instance = value != 0; @@ -474,8 +482,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c options->msl.texel_buffer_texture_width = value; break; - case SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX: - options->msl.aux_buffer_index = value; + case SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX: + options->msl.swizzle_buffer_index = value; break; case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX: @@ -533,6 +541,34 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE: options->msl.texture_buffer_native = value != 0; break; + + case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX: + options->msl.buffer_size_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW: + options->msl.multiview = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX: + options->msl.view_mask_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX: + options->msl.device_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX: + options->msl.view_index_from_device_index = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE: + options->msl.dispatch_base = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX: + options->msl.dynamic_offsets_buffer_index = value; + break; #endif default: @@ -726,7 +762,7 @@ spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler) #endif } -spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler) +spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler) { #if SPIRV_CROSS_C_API_MSL if (compiler->backend != SPVC_BACKEND_MSL) @@ -736,13 +772,35 @@ spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler) } auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); - return msl.needs_aux_buffer() ? SPVC_TRUE : SPVC_FALSE; + return msl.needs_swizzle_buffer() ? SPVC_TRUE : SPVC_FALSE; #else compiler->context->report_error("MSL function used on a non-MSL backend."); return SPVC_FALSE; #endif } +spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.needs_buffer_size_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler) +{ + return spvc_compiler_msl_needs_swizzle_buffer(compiler); +} + spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler) { #if SPIRV_CROSS_C_API_MSL @@ -811,7 +869,7 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const attr.msl_stride = va->msl_stride; attr.format = static_cast<MSLVertexFormat>(va->format); attr.builtin = static_cast<spv::BuiltIn>(va->builtin); - attr.per_instance = va->per_instance; + attr.per_instance = va->per_instance != 0; msl.add_msl_vertex_attribute(attr); return SPVC_SUCCESS; #else @@ -848,6 +906,27 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, #endif } +spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + msl.add_dynamic_buffer(desc_set, binding, index); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + (void)index; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set) { #if SPIRV_CROSS_C_API_MSL @@ -867,6 +946,26 @@ spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler #endif } +spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + msl.set_argument_buffer_device_address_space(desc_set, bool(device_address)); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)device_address; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) { #if SPIRV_CROSS_C_API_MSL @@ -907,38 +1006,140 @@ spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutio #endif } -spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, - const spvc_msl_constexpr_sampler *sampler) -{ #if SPIRV_CROSS_C_API_MSL - if (compiler->backend != SPVC_BACKEND_MSL) - { - compiler->context->report_error("MSL function used on a non-MSL backend."); - return SPVC_ERROR_INVALID_ARGUMENT; - } - - auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); - MSLConstexprSampler samp; +static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler) +{ samp.s_address = static_cast<MSLSamplerAddress>(sampler->s_address); samp.t_address = static_cast<MSLSamplerAddress>(sampler->t_address); samp.r_address = static_cast<MSLSamplerAddress>(sampler->r_address); samp.lod_clamp_min = sampler->lod_clamp_min; samp.lod_clamp_max = sampler->lod_clamp_max; - samp.lod_clamp_enable = sampler->lod_clamp_enable; + samp.lod_clamp_enable = sampler->lod_clamp_enable != 0; samp.min_filter = static_cast<MSLSamplerFilter>(sampler->min_filter); samp.mag_filter = static_cast<MSLSamplerFilter>(sampler->mag_filter); samp.mip_filter = static_cast<MSLSamplerMipFilter>(sampler->mip_filter); - samp.compare_enable = sampler->compare_enable; - samp.anisotropy_enable = sampler->anisotropy_enable; + samp.compare_enable = sampler->compare_enable != 0; + samp.anisotropy_enable = sampler->anisotropy_enable != 0; samp.max_anisotropy = sampler->max_anisotropy; samp.compare_func = static_cast<MSLSamplerCompareFunc>(sampler->compare_func); samp.coord = static_cast<MSLSamplerCoord>(sampler->coord); samp.border_color = static_cast<MSLSamplerBorderColor>(sampler->border_color); +} + +static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv) +{ + samp.ycbcr_conversion_enable = conv != nullptr; + if (conv == nullptr) return; + samp.planes = conv->planes; + samp.resolution = static_cast<MSLFormatResolution>(conv->resolution); + samp.chroma_filter = static_cast<MSLSamplerFilter>(conv->chroma_filter); + samp.x_chroma_offset = static_cast<MSLChromaLocation>(conv->x_chroma_offset); + samp.y_chroma_offset = static_cast<MSLChromaLocation>(conv->y_chroma_offset); + for (int i = 0; i < 4; i++) + samp.swizzle[i] = static_cast<MSLComponentSwizzle>(conv->swizzle[i]); + samp.ycbcr_model = static_cast<MSLSamplerYCbCrModelConversion>(conv->ycbcr_model); + samp.ycbcr_range = static_cast<MSLSamplerYCbCrRange>(conv->ycbcr_range); + samp.bpc = conv->bpc; +} +#endif + +spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); msl.remap_constexpr_sampler(id, samp); return SPVC_SUCCESS; #else (void)id; (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + (void)conv; compiler->context->report_error("MSL function used on a non-MSL backend."); return SPVC_ERROR_INVALID_ARGUMENT; #endif @@ -965,6 +1166,42 @@ spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compi #endif } +unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.get_automatic_msl_resource_binding(id); +#else + (void)id; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); +#endif +} + +unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.get_automatic_msl_resource_binding_secondary(id); +#else + (void)id; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); +#endif +} + spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source) { SPVC_BEGIN_SAFE_SCOPE @@ -1276,6 +1513,11 @@ const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, s .c_str(); } +const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index) +{ + return compiler->compiler->get_member_name(id, member_index).c_str(); +} + spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points, size_t *num_entry_points) { @@ -1413,7 +1655,7 @@ unsigned spvc_type_get_bit_width(spvc_type type) return type->width; } -unsigned spvc_type_get_SmallVector_size(spvc_type type) +unsigned spvc_type_get_vector_size(spvc_type type) { return type->vecsize; } @@ -1516,6 +1758,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c return SPVC_SUCCESS; } +spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset) { SPVC_BEGIN_SAFE_SCOPE @@ -1643,6 +1895,32 @@ spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc return ret; } +spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, + spvc_variable_id id, + const spvc_buffer_range **ranges, + size_t *num_ranges) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto active_ranges = compiler->compiler->get_active_buffer_ranges(id); + SmallVector<spvc_buffer_range> translated; + translated.reserve(active_ranges.size()); + for (auto &r : active_ranges) + { + spvc_buffer_range trans = { r.index, r.offset, r.range }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate<TemporaryBuffer<spvc_buffer_range>>(); + ptr->buffer = std::move(translated); + *ranges = ptr->buffer.data(); + *num_ranges = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row) { return constant->scalar_f16(column, row); @@ -1854,6 +2132,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) #endif } +void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + conv->planes = defaults.planes; + conv->resolution = static_cast<spvc_msl_format_resolution>(defaults.resolution); + conv->chroma_filter = static_cast<spvc_msl_sampler_filter>(defaults.chroma_filter); + conv->x_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.x_chroma_offset); + conv->y_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.y_chroma_offset); + for (int i = 0; i < 4; i++) + conv->swizzle[i] = static_cast<spvc_msl_component_swizzle>(defaults.swizzle[i]); + conv->ycbcr_model = static_cast<spvc_msl_sampler_ycbcr_model_conversion>(defaults.ycbcr_model); + conv->ycbcr_range = static_cast<spvc_msl_sampler_ycbcr_range>(defaults.ycbcr_range); +#else + memset(conv, 0, sizeof(*conv)); +#endif +} + unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler) { return compiler->compiler->get_current_id_bound(); @@ -1866,6 +2162,15 @@ void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch) *patch = SPVC_C_API_VERSION_PATCH; } +const char *spvc_get_commit_revision_and_timestamp(void) +{ +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION + return SPIRV_CROSS_GIT_REVISION; +#else + return ""; +#endif +} + #ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h index 9e10d07..f950803 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 7 +#define SPVC_C_API_VERSION_MINOR 19 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -58,6 +58,9 @@ extern "C" { */ SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch); +/* Gets a human readable version string to identify which commit a particular binary was created from. */ +SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void); + /* These types are opaque to the user. */ typedef struct spvc_context_s *spvc_context; typedef struct spvc_parsed_ir_s *spvc_parsed_ir; @@ -112,6 +115,14 @@ typedef struct spvc_specialization_constant } spvc_specialization_constant; /* See C++ API. */ +typedef struct spvc_buffer_range +{ + unsigned index; + size_t offset; + size_t range; +} spvc_buffer_range; + +/* See C++ API. */ typedef struct spvc_hlsl_root_constants { unsigned start; @@ -290,9 +301,14 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b #define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u)) #define SPVC_MSL_PUSH_CONSTANT_BINDING (0) +#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u)) +#define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u)) +#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u)) + +/* Obsolete. Sticks around for backwards compatibility. */ #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1 -/* Runtime check for incompatibility. */ +/* Runtime check for incompatibility. Obsolete. */ SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void); /* Maps to C++ API. */ @@ -355,6 +371,55 @@ typedef enum spvc_msl_sampler_border_color } spvc_msl_sampler_border_color; /* Maps to C++ API. */ +typedef enum spvc_msl_format_resolution +{ + SPVC_MSL_FORMAT_RESOLUTION_444 = 0, + SPVC_MSL_FORMAT_RESOLUTION_422, + SPVC_MSL_FORMAT_RESOLUTION_420, + SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +} spvc_msl_format_resolution; + +/* Maps to C++ API. */ +typedef enum spvc_msl_chroma_location +{ + SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + SPVC_MSL_CHROMA_LOCATION_MIDPOINT, + SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +} spvc_msl_chroma_location; + +/* Maps to C++ API. */ +typedef enum spvc_msl_component_swizzle +{ + SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + SPVC_MSL_COMPONENT_SWIZZLE_ZERO, + SPVC_MSL_COMPONENT_SWIZZLE_ONE, + SPVC_MSL_COMPONENT_SWIZZLE_R, + SPVC_MSL_COMPONENT_SWIZZLE_G, + SPVC_MSL_COMPONENT_SWIZZLE_B, + SPVC_MSL_COMPONENT_SWIZZLE_A, + SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +} spvc_msl_component_swizzle; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_ycbcr_model_conversion +{ + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_model_conversion; + +/* Maps to C+ API. */ +typedef enum spvc_msl_sampler_ycbcr_range +{ + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_range; + +/* Maps to C++ API. */ typedef struct spvc_msl_constexpr_sampler { spvc_msl_sampler_coord coord; @@ -381,6 +446,26 @@ typedef struct spvc_msl_constexpr_sampler */ SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); +/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ +typedef struct spvc_msl_sampler_ycbcr_conversion +{ + unsigned planes; + spvc_msl_format_resolution resolution; + spvc_msl_sampler_filter chroma_filter; + spvc_msl_chroma_location x_chroma_offset; + spvc_msl_chroma_location y_chroma_offset; + spvc_msl_component_swizzle swizzle[4]; + spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; + spvc_msl_sampler_ycbcr_range ycbcr_range; + unsigned bpc; +} spvc_msl_sampler_ycbcr_conversion; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); + /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ typedef enum spvc_compiler_option { @@ -407,7 +492,11 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */ SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT, @@ -428,6 +517,17 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; @@ -505,7 +605,12 @@ SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin * Maps to C++ API. */ SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler); + +/* Obsolete. Renamed to needs_swizzle_buffer. */ SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler); @@ -514,14 +619,23 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, const spvc_msl_resource_binding *binding); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, unsigned binding); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); +SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index); + /* * Reflect resources. * Maps almost 1:1 to C++ API. @@ -567,6 +681,7 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compi unsigned member_index, SpvDecoration decoration); SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index); /* * Entry points. @@ -626,6 +741,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size); SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, spvc_type struct_type, size_t array_size, size_t *size); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size); SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset); @@ -660,6 +776,15 @@ SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specializatio spvc_specialization_constant *z); /* + * Buffer ranges + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, + spvc_variable_id id, + const spvc_buffer_range **ranges, + size_t *num_ranges); + +/* * No stdint.h until C99, sigh :( * For smaller types, the result is sign or zero-extended as appropriate. * Maps to C++ API. diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp index 393f461..7b507e1 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp @@ -61,7 +61,8 @@ public: private: #if defined(_MSC_VER) && _MSC_VER < 1900 // MSVC 2013 workarounds, sigh ... - union { + union + { char aligned_char[sizeof(T) * N]; double dummy_aligner; } u; @@ -447,6 +448,11 @@ public: } } + void insert(T *itr, const T &value) + { + insert(itr, &value, &value + 1); + } + T *erase(T *itr) { std::move(itr + 1, this->end(), itr); diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp index 108000c..2387267 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp @@ -41,6 +41,7 @@ ParsedIR::ParsedIR() pool_group->pools[TypeCombinedImageSampler].reset(new ObjectPool<SPIRCombinedImageSampler>); pool_group->pools[TypeAccessChain].reset(new ObjectPool<SPIRAccessChain>); pool_group->pools[TypeUndef].reset(new ObjectPool<SPIRUndef>); + pool_group->pools[TypeString].reset(new ObjectPool<SPIRString>); } // Should have been default-implemented, but need this on MSVC 2013. @@ -71,7 +72,8 @@ ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT default_entry_point = other.default_entry_point; source = other.source; - loop_iteration_depth = other.loop_iteration_depth; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; } return *this; } @@ -99,7 +101,8 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other) entry_points = other.entry_points; default_entry_point = other.default_entry_point; source = other.source; - loop_iteration_depth = other.loop_iteration_depth; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; addressing_model = other.addressing_model; memory_model = other.memory_model; @@ -159,7 +162,7 @@ static string ensure_valid_identifier(const string &name, bool member) return str; } -const string &ParsedIR::get_name(uint32_t id) const +const string &ParsedIR::get_name(ID id) const { auto *m = find_meta(id); if (m) @@ -168,7 +171,7 @@ const string &ParsedIR::get_name(uint32_t id) const return empty_string; } -const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const +const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -181,7 +184,7 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const return empty_string; } -void ParsedIR::set_name(uint32_t id, const string &name) +void ParsedIR::set_name(ID id, const string &name) { auto &str = meta[id].decoration.alias; str.clear(); @@ -196,7 +199,7 @@ void ParsedIR::set_name(uint32_t id, const string &name) str = ensure_valid_identifier(name, false); } -void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) +void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); @@ -212,7 +215,7 @@ void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) str = ensure_valid_identifier(name, true); } -void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument) +void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -228,7 +231,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s } } -void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -294,7 +297,7 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum } } -void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); auto &dec = meta[id].members[index]; @@ -342,7 +345,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec // Recursively marks any constants referenced by the specified constant instruction as being used // as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp). -void ParsedIR::mark_used_as_array_length(uint32_t id) +void ParsedIR::mark_used_as_array_length(ID id) { switch (ids[id].get_type()) { @@ -353,8 +356,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id) case TypeConstantOp: { auto &cop = get<SPIRConstantOp>(id); - for (uint32_t arg_id : cop.arguments) - mark_used_as_array_length(arg_id); + if (cop.opcode == OpCompositeExtract) + mark_used_as_array_length(cop.arguments[0]); + else if (cop.opcode == OpCompositeInsert) + { + mark_used_as_array_length(cop.arguments[0]); + mark_used_as_array_length(cop.arguments[1]); + } + else + for (uint32_t arg_id : cop.arguments) + mark_used_as_array_length(arg_id); break; } @@ -390,7 +401,7 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const return base_flags; } -const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -403,12 +414,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index return cleared_bitset; } -bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const +bool ParsedIR::has_decoration(ID id, Decoration decoration) const { return get_decoration_bitset(id).get(decoration); } -uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const +uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -449,7 +460,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const } } -const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const +const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -470,7 +481,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration } } -void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) +void ParsedIR::unset_decoration(ID id, Decoration decoration) { auto &dec = meta[id].decoration; dec.decoration_flags.clear(decoration); @@ -532,12 +543,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) } } -bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return get_member_decoration_bitset(id, index).get(decoration); } -uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -571,7 +582,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration } } -const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const +const Bitset &ParsedIR::get_decoration_bitset(ID id) const { auto *m = find_meta(id); if (m) @@ -583,7 +594,7 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const return cleared_bitset; } -void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument) +void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); auto &dec = meta[id].members[index]; @@ -600,7 +611,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat } } -const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (m) @@ -623,7 +634,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index return empty_string; } -void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { auto &m = meta[id]; if (index >= m.members.size()) @@ -676,7 +687,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount) return uint32_t(curr_bound); } -void ParsedIR::remove_typed_id(Types type, uint32_t id) +void ParsedIR::remove_typed_id(Types type, ID id) { auto &type_ids = ids_for_type[type]; type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids)); @@ -691,11 +702,18 @@ void ParsedIR::reset_all_of_type(Types type) ids_for_type[type].clear(); } -void ParsedIR::add_typed_id(Types type, uint32_t id) +void ParsedIR::add_typed_id(Types type, ID id) { - if (loop_iteration_depth) + if (loop_iteration_depth_hard != 0) SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); + if (loop_iteration_depth_soft != 0) + { + if (!ids[id].empty()) + SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked."); + return; + } + if (ids[id].empty() || ids[id].get_type() != type) { switch (type) @@ -730,7 +748,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id) } } -const Meta *ParsedIR::find_meta(uint32_t id) const +const Meta *ParsedIR::find_meta(ID id) const { auto itr = meta.find(id); if (itr != end(meta)) @@ -739,7 +757,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const return nullptr; } -Meta *ParsedIR::find_meta(uint32_t id) +Meta *ParsedIR::find_meta(ID id) { auto itr = meta.find(id); if (itr != end(meta)) @@ -748,4 +766,41 @@ Meta *ParsedIR::find_meta(uint32_t id) return nullptr; } +ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_hard); +} + +ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_soft); +} + +ParsedIR::LoopLock::~LoopLock() +{ + if (lock) + (*lock)--; +} + +ParsedIR::LoopLock::LoopLock(uint32_t *lock_) + : lock(lock_) +{ + if (lock) + (*lock)++; +} + +ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + *this = move(other); +} + +ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + if (lock) + (*lock)--; + lock = other.lock; + other.lock = nullptr; + return *this; +} + } // namespace SPIRV_CROSS_NAMESPACE diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp index 79e9e15..97bc9ea 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp @@ -57,19 +57,19 @@ public: SmallVector<Variant> ids; // Various meta data for IDs, decorations, names, etc. - std::unordered_map<uint32_t, Meta> meta; + std::unordered_map<ID, Meta> meta; // Holds all IDs which have a certain type. // This is needed so we can iterate through a specific kind of resource quickly, // and in-order of module declaration. - SmallVector<uint32_t> ids_for_type[TypeCount]; + SmallVector<ID> ids_for_type[TypeCount]; // Special purpose lists which contain a union of types. // This is needed so we can declare specialization constants and structs in an interleaved fashion, // among other things. // Constants can be of struct type, and struct array sizes can use specialization constants. - SmallVector<uint32_t> ids_for_constant_or_type; - SmallVector<uint32_t> ids_for_constant_or_variable; + SmallVector<ID> ids_for_constant_or_type; + SmallVector<ID> ids_for_constant_or_variable; // Declared capabilities and extensions in the SPIR-V module. // Not really used except for reflection at the moment. @@ -88,12 +88,12 @@ public: }; using BlockMetaFlags = uint8_t; SmallVector<BlockMetaFlags> block_meta; - std::unordered_map<uint32_t, uint32_t> continue_block_to_loop_header; + std::unordered_map<BlockID, BlockID> continue_block_to_loop_header; // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. // Entry points can therefore be seen as some sort of meta structure. - std::unordered_map<uint32_t, SPIREntryPoint> entry_points; - uint32_t default_entry_point = 0; + std::unordered_map<FunctionID, SPIREntryPoint> entry_points; + FunctionID default_entry_point = 0; struct Source { @@ -114,50 +114,75 @@ public: // Can be useful for simple "raw" reflection. // However, most members are here because the Parser needs most of these, // and might as well just have the whole suite of decoration/name handling in one place. - void set_name(uint32_t id, const std::string &name); - const std::string &get_name(uint32_t id) const; - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); - bool has_decoration(uint32_t id, spv::Decoration decoration) const; - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; - const Bitset &get_decoration_bitset(uint32_t id) const; - void unset_decoration(uint32_t id, spv::Decoration decoration); + void set_name(ID id, const std::string &name); + const std::string &get_name(ID id) const; + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); + bool has_decoration(ID id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; + const Bitset &get_decoration_bitset(ID id) const; + void unset_decoration(ID id, spv::Decoration decoration); // Decoration handling methods (for members of a struct). - void set_member_name(uint32_t id, uint32_t index, const std::string &name); - const std::string &get_member_name(uint32_t id, uint32_t index) const; - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_name(TypeID id, uint32_t index, const std::string &name); + const std::string &get_member_name(TypeID id, uint32_t index) const; + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); - void mark_used_as_array_length(uint32_t id); + void mark_used_as_array_length(ID id); uint32_t increase_bound_by(uint32_t count); Bitset get_buffer_block_flags(const SPIRVariable &var) const; - void add_typed_id(Types type, uint32_t id); - void remove_typed_id(Types type, uint32_t id); + void add_typed_id(Types type, ID id); + void remove_typed_id(Types type, ID id); + + class LoopLock + { + public: + explicit LoopLock(uint32_t *counter); + LoopLock(const LoopLock &) = delete; + void operator=(const LoopLock &) = delete; + LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + ~LoopLock(); + + private: + uint32_t *lock; + }; + + // This must be held while iterating over a type ID array. + // It is undefined if someone calls set<>() while we're iterating over a data structure, so we must + // make sure that this case is avoided. + + // If we have a hard lock, it is an error to call set<>(), and an exception is thrown. + // If we have a soft lock, we silently ignore any additions to the typed arrays. + // This should only be used for physical ID remapping where we need to create an ID, but we will never + // care about iterating over them. + LoopLock create_loop_hard_lock() const; + LoopLock create_loop_soft_lock() const; template <typename T, typename Op> void for_each_typed_id(const Op &op) { - loop_iteration_depth++; + auto loop_lock = create_loop_hard_lock(); for (auto &id : ids_for_type[T::type]) { if (ids[id].get_type() == static_cast<Types>(T::type)) op(id, get<T>(id)); } - loop_iteration_depth--; } template <typename T, typename Op> void for_each_typed_id(const Op &op) const { + auto loop_lock = create_loop_hard_lock(); for (auto &id : ids_for_type[T::type]) { if (ids[id].get_type() == static_cast<Types>(T::type)) @@ -173,8 +198,8 @@ public: void reset_all_of_type(Types type); - Meta *find_meta(uint32_t id); - const Meta *find_meta(uint32_t id) const; + Meta *find_meta(ID id); + const Meta *find_meta(ID id) const; const std::string &get_empty_string() const { @@ -194,7 +219,8 @@ private: return variant_get<T>(ids[id]); } - uint32_t loop_iteration_depth = 0; + mutable uint32_t loop_iteration_depth_hard = 0; + mutable uint32_t loop_iteration_depth_soft = 0; std::string empty_string; Bitset cleared_bitset; }; diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp index 32582fb..bcdcd5f 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp @@ -288,8 +288,17 @@ static uint32_t pls_format_to_components(PlsFormat format) static const char *vector_swizzle(int vecsize, int index) { - static const char *swizzle[4][4] = { - { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" } + static const char *const swizzle[4][4] = { + { ".x", ".y", ".z", ".w" }, + { ".xy", ".yz", ".zw", nullptr }, + { ".xyz", ".yzw", nullptr, nullptr }, +#if defined(__GNUC__) && (__GNUC__ == 9) + // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. + // This array ends up being compiled as all nullptrs, tripping the assertions below. + { "", nullptr, nullptr, "$" }, +#else + { "", nullptr, nullptr, nullptr }, +#endif }; assert(vecsize >= 1 && vecsize <= 4); @@ -313,6 +322,7 @@ void CompilerGLSL::reset() // Clear temporary usage tracking. expression_usage_counts.clear(); forwarded_temporaries.clear(); + suppressed_usage_tracking.clear(); reset_name_caches(); @@ -494,11 +504,14 @@ string CompilerGLSL::compile() backend.supports_extensions = true; // Scan the SPIR-V to find trivial uses of extensions. + fixup_type_alias(); + reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. @@ -523,6 +536,25 @@ string CompilerGLSL::compile() pass_count++; } while (is_forcing_recompilation()); + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + statement("spvMainInterlockedBody();"); + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + end_scope(); + } + // Entry point in GLSL is always main(). get_entry_point().name = "main"; @@ -589,6 +621,30 @@ void CompilerGLSL::emit_header() require_extension_internal("GL_ARB_shader_image_load_store"); } + // Needed for: layout(post_depth_coverage) in; + if (execution.flags.get(ExecutionModePostDepthCoverage)) + require_extension_internal("GL_ARB_post_depth_coverage"); + + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + for (auto &ext : forced_extensions) { if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") @@ -624,6 +680,19 @@ void CompilerGLSL::emit_header() statement("#endif"); } } + else if (ext == "GL_ARB_post_depth_coverage") + { + if (options.es) + statement("#extension GL_EXT_post_depth_coverage : require"); + else + { + statement("#if defined(GL_ARB_post_depth_coverge)"); + statement("#extension GL_ARB_post_depth_coverage : require"); + statement("#else"); + statement("#extension GL_EXT_post_depth_coverage : require"); + statement("#endif"); + } + } else statement("#extension ", ext, " : require"); } @@ -698,7 +767,8 @@ void CompilerGLSL::emit_header() // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro // declarations before we can emit the work group size. - if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0))) + if (options.vulkan_semantics || + ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) build_workgroup_size(inputs, wg_x, wg_y, wg_z); } else @@ -752,6 +822,17 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModeEarlyFragmentTests)) inputs.push_back("early_fragment_tests"); + if (execution.flags.get(ExecutionModePostDepthCoverage)) + inputs.push_back("post_depth_coverage"); + + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + inputs.push_back("pixel_interlock_ordered"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + inputs.push_back("pixel_interlock_unordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + inputs.push_back("sample_interlock_ordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + inputs.push_back("sample_interlock_unordered"); if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); @@ -784,7 +865,8 @@ void CompilerGLSL::emit_struct(SPIRType &type) // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return; add_resource_name(type.self); @@ -812,6 +894,9 @@ void CompilerGLSL::emit_struct(SPIRType &type) emitted = true; } + if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) + emit_struct_padding_target(type); + end_scope_decl(); if (emitted) @@ -821,8 +906,6 @@ void CompilerGLSL::emit_struct(SPIRType &type) string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) { string res; - if (flags.get(DecorationNonUniformEXT)) - res += "nonuniformEXT "; //if (flags & (1ull << DecorationSmooth)) // res += "smooth "; if (flags.get(DecorationFlat)) @@ -902,7 +985,8 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. // This is only done selectively in GLSL as needed. - if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset)) + if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && + dec.decoration_flags.get(DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); if (attr.empty()) @@ -1322,14 +1406,20 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin // Only care about packing if we are in the given range if (offset >= start_offset) { + uint32_t actual_offset = type_struct_member_offset(type, i); + // We only care about offsets in std140, std430, etc ... // For EnhancedLayout variants, we have the flexibility to choose our own offsets. if (!packing_has_flexible_offset(packing)) { - uint32_t actual_offset = type_struct_member_offset(type, i); if (actual_offset != offset) // This cannot be the packing we're looking for. return false; } + else if ((actual_offset & (alignment - 1)) != 0) + { + // We still need to verify that alignment rules are observed, even if we have explicit offset. + return false; + } // Verify array stride rules. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != @@ -1408,6 +1498,8 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (options.vulkan_semantics && var.storage == StorageClassPushConstant) attr.push_back("push_constant"); + else if (var.storage == StorageClassShaderRecordBufferNV) + attr.push_back("shaderRecordNV"); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); @@ -1453,14 +1545,14 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) // Do not emit set = decoration in regular GLSL output, but // we need to preserve it in Vulkan GLSL mode. - if (var.storage != StorageClassPushConstant) + if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV) { if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) attr.push_back(join("set = ", dec.set)); } bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; - bool ssbo_block = var.storage == StorageClassStorageBuffer || + bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); @@ -1482,6 +1574,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) if (!can_use_buffer_blocks && var.storage == StorageClassUniform) can_use_binding = false; + if (var.storage == StorageClassShaderRecordBufferNV) + can_use_binding = false; + if (can_use_binding && flags.get(DecorationBinding)) attr.push_back(join("binding = ", dec.binding)); @@ -1517,9 +1612,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) return res; } -string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check_std430) +string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) { - if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430)) + if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) return "std430"; else if (buffer_is_packing_standard(type, BufferPackingStd140)) return "std140"; @@ -1528,7 +1623,8 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } - else if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + else if (support_std430_without_scalar_layout && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " @@ -1536,7 +1632,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) @@ -1550,15 +1646,30 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + buffer_is_packing_standard(type, BufferPackingStd430)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } else { SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " @@ -1727,7 +1838,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) auto &type = get<SPIRType>(var.basetype); Bitset flags = ir.get_buffer_block_flags(var); - bool ssbo = var.storage == StorageClassStorageBuffer || + bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool is_restrict = ssbo && flags.get(DecorationRestrict); bool is_writeonly = ssbo && flags.get(DecorationNonReadable); @@ -1844,6 +1955,14 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { return "hitAttributeNV "; } + else if (var.storage == StorageClassCallableDataNV) + { + return "callableDataNV "; + } + else if (var.storage == StorageClassIncomingCallableDataNV) + { + return "callableDataInNV "; + } return ""; } @@ -2015,7 +2134,7 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) auto name = to_name(constant.self); SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // This specialization constant is implicitly declared by emitting layout() in; if (constant.self == workgroup_size_id) @@ -2024,7 +2143,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) // These specialization constants are implicitly declared by emitting layout() in; // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration // later can use macro overrides for work group size. - bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id; + bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || + ConstantID(constant.self) == wg_z.id; if (options.vulkan_semantics && is_workgroup_size_constant) { @@ -2374,7 +2494,7 @@ void CompilerGLSL::declare_undefined_values() bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const { - bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable; + bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; if (statically_assigned) { @@ -2446,44 +2566,47 @@ void CompilerGLSL::emit_resources() // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. // - for (auto &id_ : ir.ids_for_constant_or_type) { - auto &id = ir.ids[id_]; - - if (id.get_type() == TypeConstant) + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_or_type) { - auto &c = id.get<SPIRConstant>(); - - bool needs_declaration = c.specialization || c.is_used_as_lut; + auto &id = ir.ids[id_]; - if (needs_declaration) + if (id.get_type() == TypeConstant) { - if (!options.vulkan_semantics && c.specialization) + auto &c = id.get<SPIRConstant>(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) { - c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + emitted = true; } - emit_constant(c); + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get<SPIRConstantOp>()); emitted = true; } - } - else if (id.get_type() == TypeConstantOp) - { - emit_specialization_constant_op(id.get<SPIRConstantOp>()); - emitted = true; - } - else if (id.get_type() == TypeType) - { - auto &type = id.get<SPIRType>(); - if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && - (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && - !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + else if (id.get_type() == TypeType) { - if (emitted) - statement(""); - emitted = false; + auto &type = id.get<SPIRType>(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + if (emitted) + statement(""); + emitted = false; - emit_struct(type); + emit_struct(type); + } } } } @@ -2500,7 +2623,7 @@ void CompilerGLSL::emit_resources() SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0)) + if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) { SmallVector<string> inputs; build_workgroup_size(inputs, wg_x, wg_y, wg_z); @@ -2545,7 +2668,8 @@ void CompilerGLSL::emit_resources() ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { auto &type = this->get<SPIRType>(var.basetype); - bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || + type.storage == StorageClassShaderRecordBufferNV; bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); @@ -2585,8 +2709,9 @@ void CompilerGLSL::emit_resources() if (var.storage != StorageClassFunction && type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || - type.storage == StorageClassRayPayloadNV || type.storage == StorageClassHitAttributeNV || - type.storage == StorageClassIncomingRayPayloadNV) && + type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV || + type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV || + type.storage == StorageClassHitAttributeNV) && !is_hidden_variable(var)) { emit_uniform(var); @@ -2646,7 +2771,7 @@ void CompilerGLSL::emit_resources() // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. -string CompilerGLSL::to_func_call_arg(uint32_t id) +string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) { // Make sure that we use the name of the original variable, and not the parameter alias. uint32_t name_id = id; @@ -2667,8 +2792,8 @@ void CompilerGLSL::handle_invalid_expression(uint32_t id) // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. // GLSL does not support packed formats, so simply return the expression. -// Subclasses that do will override -string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t) +// Subclasses that do will override. +string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) { return expr_str; } @@ -2762,13 +2887,22 @@ string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std string CompilerGLSL::address_of_expression(const std::string &expr) { - // If this expression starts with a dereference operator ('*'), then - // just return the part after the operator. - // TODO: Strip parens if unnecessary? - if (expr.front() == '*') + if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') + { + // If we have an expression which looks like (*foo), taking the address of it is the same as stripping + // the first two and last characters. We might have to enclose the expression. + // This doesn't work for cases like (*foo + 10), + // but this is an r-value expression which we cannot take the address of anyways. + return enclose_expression(expr.substr(2, expr.size() - 3)); + } + else if (expr.front() == '*') + { + // If this expression starts with a dereference operator ('*'), then + // just return the part after the operator. return expr.substr(1); + } else - return join('&', expr); + return join('&', enclose_expression(expr)); } // Just like to_expression except that we enclose the expression inside parentheses if needed. @@ -2777,14 +2911,30 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expressio return enclose_expression(to_expression(id, register_expression_read)); } +// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. +// need_transpose must be forced to false. +string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) +{ + return unpack_expression_type(to_expression(id), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); +} + string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) { // If we need to transpose, it will also take care of unpacking rules. auto *e = maybe_get<SPIRExpression>(id); bool need_transpose = e && e->need_transpose; - if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) - return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), - get_extended_decoration(id, SPIRVCrossDecorationPackedType)); + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + + if (!need_transpose && (is_remapped || is_packed)) + { + return unpack_expression_type(to_expression(id, register_expression_read), + get_pointee_type(expression_type_id(id)), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); + } else return to_expression(id, register_expression_read); } @@ -2794,9 +2944,14 @@ string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_ // If we need to transpose, it will also take care of unpacking rules. auto *e = maybe_get<SPIRExpression>(id); bool need_transpose = e && e->need_transpose; - if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (!need_transpose && (is_remapped || is_packed)) + { return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), - get_extended_decoration(id, SPIRVCrossDecorationPackedType)); + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); + } else return to_enclosed_expression(id, register_expression_read); } @@ -2831,12 +2986,55 @@ string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_e string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) { auto expr = to_enclosed_expression(id); - if (has_extended_decoration(id, SPIRVCrossDecorationPacked)) + if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) return join(expr, "[", index, "]"); else return join(expr, ".", index_to_swizzle(index)); } +string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) +{ + uint32_t size = to_array_size_literal(type); + auto &parent = get<SPIRType>(type.parent_type); + string expr = "{ "; + + for (uint32_t i = 0; i < size; i++) + { + auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); + if (parent.array.empty()) + expr += subexpr; + else + expr += to_rerolled_array_expression(subexpr, parent); + + if (i + 1 < size) + expr += ", "; + } + + expr += " }"; + return expr; +} + +string CompilerGLSL::to_composite_constructor_expression(uint32_t id) +{ + auto &type = expression_type(id); + if (!backend.array_is_value_type && !type.array.empty()) + { + // For this case, we need to "re-roll" an array initializer from a temporary. + // We cannot simply pass the array directly, since it decays to a pointer and it cannot + // participate in a struct initializer. E.g. + // float arr[2] = { 1.0, 2.0 }; + // Foo foo = { arr }; must be transformed to + // Foo foo = { { arr[0], arr[1] } }; + // The array sizes cannot be deduced from specialization constants since we cannot use any loops. + + // We're only triggering one read of the array expression, but this is fine since arrays have to be declared + // as temporaries anyways. + return to_rerolled_array_expression(to_enclosed_expression(id), type); + } + else + return to_unpacked_expression(id); +} + string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) { auto itr = invalid_expressions.find(id); @@ -2874,8 +3072,12 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) return to_enclosed_expression(e.base_expression) + e.expression; else if (e.need_transpose) { - bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked); - return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed); + // This should not be reached for access chains, since we always deal explicitly with transpose state + // when consuming an access chain expression. + uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id, + is_packed); } else { @@ -3675,15 +3877,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t if (splat) { res += convert_to_string(c.scalar(vector, 0)); - if (is_legacy()) - { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i16(vector, 0) < 0) - SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); - } - else - res += backend.uint16_t_literal_suffix; } else { @@ -3693,17 +3886,19 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t res += to_name(c.specialization_constant_id(vector, i)); else { - res += convert_to_string(c.scalar(vector, i)); - if (is_legacy()) + if (*backend.uint16_t_literal_suffix) { - // Fake unsigned constant literals with signed ones if possible. - // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. - if (c.scalar_i16(vector, i) < 0) - SPIRV_CROSS_THROW( - "Tried to convert uint literal into int, but this made the literal negative."); + res += convert_to_string(c.scalar_u16(vector, i)); + res += backend.uint16_t_literal_suffix; } else - res += backend.uint16_t_literal_suffix; + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u16(vector, i)); + res += ")"; + } } if (i + 1 < c.vector_size()) @@ -3716,7 +3911,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t if (splat) { res += convert_to_string(c.scalar_i16(vector, 0)); - res += backend.int16_t_literal_suffix; } else { @@ -3726,9 +3920,21 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t res += to_name(c.specialization_constant_id(vector, i)); else { - res += convert_to_string(c.scalar_i16(vector, i)); - res += backend.int16_t_literal_suffix; + if (*backend.int16_t_literal_suffix) + { + res += convert_to_string(c.scalar_i16(vector, i)); + res += backend.int16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i16(vector, i)); + res += ")"; + } } + if (i + 1 < c.vector_size()) res += ", "; } @@ -3883,9 +4089,14 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) } } -bool CompilerGLSL::expression_is_forwarded(uint32_t id) +bool CompilerGLSL::expression_is_forwarded(uint32_t id) const +{ + return forwarded_temporaries.count(id) != 0; +} + +bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const { - return forwarded_temporaries.find(id) != end(forwarded_temporaries); + return suppressed_usage_tracking.count(id) != 0; } SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, @@ -3895,8 +4106,9 @@ SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, { // Just forward it without temporary. // If the forward is trivial, we do not force flushing to temporary for this expression. - if (!suppress_usage_tracking) - forwarded_temporaries.insert(result_id); + forwarded_temporaries.insert(result_id); + if (suppress_usage_tracking) + suppressed_usage_tracking.insert(result_id); return set<SPIRExpression>(result_id, rhs, result_type, true); } @@ -3947,8 +4159,18 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_ } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op) + const char *op, bool negate, SPIRType::BaseType expected_type) { + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + SPIRType target_type0 = type0; + SPIRType target_type1 = type1; + target_type0.basetype = expected_type; + target_type1.basetype = expected_type; + target_type0.vecsize = 1; + target_type1.vecsize = 1; + auto &type = get<SPIRType>(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; @@ -3956,11 +4178,25 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. - expr += to_extract_component_expression(op0, i); + if (negate) + expr += "!("; + + if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) + expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); + else + expr += to_extract_component_expression(op0, i); + expr += ' '; expr += op; expr += ' '; - expr += to_extract_component_expression(op1, i); + + if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) + expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); + else + expr += to_extract_component_expression(op1, i); + + if (negate) + expr += ")"; if (i + 1 < type.vecsize) expr += ", "; @@ -4082,6 +4318,58 @@ void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result inherit_expression_dependencies(result_id, op0); } +// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs +// and different vector sizes all at once. Need a special purpose method here. +void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, + SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2) +{ + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type0; + + string cast_op0 = + expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + + // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. + expected_type.basetype = input_type1; + expected_type.vecsize = 1; + string cast_op1 = expression_type(op1).basetype != input_type1 ? + join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") : + op1_expr; + + expected_type.basetype = input_type2; + expected_type.vecsize = 1; + string cast_op2 = expression_type(op2).basetype != input_type2 ? + join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") : + op2_expr; + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.vecsize = out_type.vecsize; + expected_type.basetype = expected_result_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType input_type) { @@ -4170,6 +4458,44 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result inherit_expression_dependencies(result_id, op3); } +void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op, + SPIRType::BaseType offset_count_type) +{ + // Only need to cast offset/count arguments. Types of base/insert must be same as result type, + // and bitfieldInsert is sign invariant. + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + + auto op0_expr = to_unpacked_expression(op0); + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + auto op3_expr = to_unpacked_expression(op3); + + SPIRType target_type; + target_type.vecsize = 1; + target_type.basetype = offset_count_type; + + if (expression_type(op2).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")"); + } + + if (expression_type(op3).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")"); + } + + emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex // functions in vertex shaders so we revert those back to plain calls when @@ -4401,7 +4727,7 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, } string mix_op; - bool has_boolean_mix = backend.boolean_mix_support && + bool has_boolean_mix = *backend.boolean_mix_function && ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); @@ -4431,11 +4757,13 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, inherit_expression_dependencies(id, right); inherit_expression_dependencies(id, lerp); } + else if (lerptype.basetype == SPIRType::Boolean) + emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); else emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } -string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) +string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) { // Keep track of the array indices we have used to load the image. // We'll need to use the same array index into the combined image sampler array. @@ -4457,18 +4785,18 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_ samp_id = samp->self; auto image_itr = find_if(begin(args), end(args), - [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); + [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); auto sampler_itr = find_if(begin(args), end(args), - [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); + [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); if (image_itr != end(args) || sampler_itr != end(args)) { // If any parameter originates from a parameter, we will find it in our argument list. bool global_image = image_itr == end(args); bool global_sampler = sampler_itr == end(args); - uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); - uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); + VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args))); + VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); auto &combined = current_function->combined_parameters; auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { @@ -4509,15 +4837,16 @@ void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i { emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get<SPIRType>(result_type), result_id).c_str()); - - // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. - forwarded_temporaries.erase(result_id); } else { // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); } + + // Make sure to suppress usage tracking and any expression invalidation. + // It is illegal to create temporaries of opaque types. + forwarded_temporaries.erase(result_id); } static inline bool image_opcode_is_sample_no_dref(Op op) @@ -4547,13 +4876,41 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) { auto *ops = stream(i); auto op = static_cast<Op>(i.op); - uint32_t length = i.length; SmallVector<uint32_t> inherited_expressions; - uint32_t result_type = ops[0]; + uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; - uint32_t img = ops[2]; + + bool forward = false; + string expr = to_texture_op(i, &forward, inherited_expressions); + emit_op(result_type_id, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward, + SmallVector<uint32_t> &inherited_expressions) +{ + auto *ops = stream(i); + auto op = static_cast<Op>(i.op); + uint32_t length = i.length; + + uint32_t result_type_id = ops[0]; + VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; @@ -4562,8 +4919,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) bool fetch = false; const uint32_t *opt = nullptr; + auto &result_type = get<SPIRType>(result_type_id); + inherited_expressions.push_back(coord); + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + switch (op) { case OpImageSampleDrefImplicitLod: @@ -4658,6 +5021,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) uint32_t offset = 0; uint32_t coffsets = 0; uint32_t sample = 0; + uint32_t minlod = 0; uint32_t flags = 0; if (length) @@ -4683,14 +5047,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) test(offset, ImageOperandsOffsetMask); test(coffsets, ImageOperandsConstOffsetsMask); test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); string expr; - bool forward = false; expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), - (!!grad_x || !!grad_y), !!dref, lod); + (!!grad_x || !!grad_y), !!dref, lod, minlod); expr += "("; expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, - coffset, offset, bias, comp, sample, &forward); + coffset, offset, bias, comp, sample, minlod, forward); expr += ")"; // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. @@ -4703,7 +5067,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) { bool image_is_depth = false; const auto *combined = maybe_get<SPIRCombinedImageSampler>(img); - uint32_t image_id = combined ? combined->image : img; + VariableID image_id = combined ? combined->image : img; if (combined && image_is_comparison(imgtype, combined->image)) image_is_depth = true; @@ -4718,29 +5082,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) image_is_depth = true; if (image_is_depth) - expr = remap_swizzle(get<SPIRType>(result_type), 1, expr); + expr = remap_swizzle(result_type, 1, expr); + } + + if (!backend.support_small_type_sampling_result && result_type.width < 32) + { + // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. + // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. + expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); } // Deals with reads from MSL. We might need to downconvert to fewer components. if (op == OpImageRead) - expr = remap_swizzle(get<SPIRType>(result_type), 4, expr); + expr = remap_swizzle(result_type, 4, expr); - emit_op(result_type, id, expr, forward); - for (auto &inherit : inherited_expressions) - inherit_expression_dependencies(id, inherit); - - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleProjDrefImplicitLod: - register_control_dependent_expression(id); - break; - - default: - break; - } + return expr; } bool CompilerGLSL::expression_is_constant_null(uint32_t id) const @@ -4753,10 +5109,13 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const // Returns the function name for a texture sampling function for the specified image and sampling characteristics. // For some subclasses, the function is a method on the specified image. -string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, +string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, - uint32_t lod) + uint32_t lod, uint32_t minlod) { + if (minlod != 0) + SPIRV_CROSS_THROW("Sparse texturing not yet supported."); + string fname; // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. @@ -4812,10 +5171,19 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) { if (options.vulkan_semantics) { - // Newer glslang supports this extension to deal with texture2D as argument to texture functions. if (dummy_sampler_id) - SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining."); - require_extension_internal("GL_EXT_samplerless_texture_functions"); + { + // Don't need to consider Shadow state since the dummy sampler is always non-shadow. + auto sampled_type = type; + sampled_type.basetype = SPIRType::SampledImage; + return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ", + to_expression(dummy_sampler_id), ")"); + } + else + { + // Newer glslang supports this extension to deal with texture2D as argument to texture functions. + require_extension_internal("GL_EXT_samplerless_texture_functions"); + } } else { @@ -4832,10 +5200,11 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, +string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, - uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) + uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/, + bool *p_forward) { string farg_str; if (is_fetch) @@ -5112,7 +5481,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, case GLSLstd450ModfStruct: { - forced_temporaries.insert(id); auto &type = get<SPIRType>(result_type); emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", @@ -5252,7 +5620,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, case GLSLstd450FrexpStruct: { - forced_temporaries.insert(id); auto &type = get<SPIRType>(result_type); emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", @@ -5261,8 +5628,28 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, } case GLSLstd450Ldexp: - emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); + { + bool forward = should_forward(args[0]) && should_forward(args[1]); + + auto op0 = to_unpacked_expression(args[0]); + auto op1 = to_unpacked_expression(args[1]); + auto &op1_type = expression_type(args[1]); + if (op1_type.basetype != SPIRType::Int) + { + // Need a value cast here. + auto target_type = op1_type; + target_type.basetype = SPIRType::Int; + op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")"); + } + + auto expr = join("ldexp(", op0, ", ", op1, ")"); + + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); break; + } + case GLSLstd450PackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); break; @@ -5326,7 +5713,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, // Bit-fiddling case GLSLstd450FindILsb: - emit_unary_func_op(result_type, id, args[0], "findLSB"); + // findLSB always returns int. + emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); break; case GLSLstd450FindSMsb: @@ -5808,13 +6196,38 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i // Floating <-> Integer special casts. Just have to enumerate all cases. :( // 16-bit, 32-bit and 64-bit floats. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToUint"; + } else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToInt"; + } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); return "uintBitsToFloat"; + } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); return "intBitsToFloat"; + } + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) return "doubleBitsToInt64"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) @@ -6114,6 +6527,43 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInIncomingRayFlagsNV: return "gl_IncomingRayFlagsNV"; + case BuiltInBaryCoordNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + + case BuiltInBaryCoordNoPerspNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + + case BuiltInFragStencilRefEXT: + { + if (!options.es) + { + require_extension_internal("GL_ARB_shader_stencil_export"); + return "gl_FragStencilRefARB"; + } + else + SPIRV_CROSS_THROW("Stencil export not supported in GLES."); + } + + case BuiltInDeviceIndex: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); + require_extension_internal("GL_EXT_device_group"); + return "gl_DeviceIndex"; + default: return join("gl_BuiltIn_", convert_to_string(builtin)); } @@ -6147,7 +6597,16 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; if (!chain_only) + { + // We handle transpose explicitly, so don't resolve that here. + auto *e = maybe_get<SPIRExpression>(base); + bool old_transpose = e && e->need_transpose; + if (e) + e->need_transpose = false; expr = to_enclosed_expression(base, register_expression_read); + if (e) + e->need_transpose = old_transpose; + } // Start traversing type hierarchy at the proper non-pointer types, // but keep type_id referencing the original pointer for use below. @@ -6171,8 +6630,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); - bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked); - uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType); + bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); + uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); bool is_invariant = has_decoration(base, DecorationInvariant); bool pending_array_enclose = false; bool dimension_flatten = false; @@ -6344,9 +6803,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice BuiltIn builtin; if (is_member_builtin(*type, index, &builtin)) { - // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. - // To make this properly work by omitting all OpName opcodes, - // we need to infer gl_in or gl_out based on the builtin, and stage. if (access_chain_is_arrayed) { expr += "."; @@ -6368,11 +6824,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (has_member_decoration(type->self, index, DecorationInvariant)) is_invariant = true; - is_packed = member_is_packed_type(*type, index); - if (is_packed) - packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType); + is_packed = member_is_packed_physical_type(*type, index); + if (member_is_remapped_physical_type(*type, index)) + physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); else - packed_type = 0; + physical_type = 0; row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get<SPIRType>(type->member_types[index]); @@ -6380,13 +6836,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // Matrix -> Vector else if (type->columns > 1) { - if (row_major_matrix_needs_conversion) - { - expr = convert_row_major_matrix(expr, *type, is_packed); - row_major_matrix_needs_conversion = false; - is_packed = false; - packed_type = 0; - } + // If we have a row-major matrix here, we need to defer any transpose in case this access chain + // is used to store a column. We can resolve it right here and now if we access a scalar directly, + // by flipping indexing order of the matrix. expr += "["; if (index_is_literal) @@ -6401,16 +6853,36 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // Vector -> Scalar else if (type->vecsize > 1) { - if (index_is_literal && !is_packed) + string deferred_index; + if (row_major_matrix_needs_conversion) + { + // Flip indexing order. + auto column_index = expr.find_last_of('['); + if (column_index != string::npos) + { + deferred_index = expr.substr(column_index); + expr.resize(column_index); + } + } + + if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion) { expr += "."; expr += index_to_swizzle(index); } - else if (ir.ids[index].get_type() == TypeConstant && !is_packed) + else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) { auto &c = get<SPIRConstant>(index); - expr += "."; - expr += index_to_swizzle(c.scalar()); + if (c.specialization) + { + // If the index is a spec constant, we cannot turn extract into a swizzle. + expr += join("[", to_expression(index), "]"); + } + else + { + expr += "."; + expr += index_to_swizzle(c.scalar()); + } } else if (index_is_literal) { @@ -6424,8 +6896,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice expr += "]"; } + expr += deferred_index; + row_major_matrix_needs_conversion = false; + is_packed = false; - packed_type = 0; + physical_type = 0; type_id = type->parent_type; type = &get<SPIRType>(type_id); } @@ -6445,7 +6920,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice meta->need_transpose = row_major_matrix_needs_conversion; meta->storage_is_packed = is_packed; meta->storage_is_invariant = is_invariant; - meta->storage_packed_type = packed_type; + meta->storage_physical_type = physical_type; } return expr; @@ -6587,7 +7062,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin // Cannot forward transpositions, so resolve them here. if (need_transpose) - expr += convert_row_major_matrix(tmp, member_type, false); + expr += convert_row_major_matrix(tmp, member_type, 0, false); else expr += tmp; } @@ -6903,7 +7378,7 @@ bool CompilerGLSL::should_dereference(uint32_t id) return true; } -bool CompilerGLSL::should_forward(uint32_t id) +bool CompilerGLSL::should_forward(uint32_t id) const { // If id is a variable we will try to forward it regardless of force_temporary check below // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL @@ -6922,6 +7397,12 @@ bool CompilerGLSL::should_forward(uint32_t id) return false; } +bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const +{ + // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. + return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); +} + void CompilerGLSL::track_expression_read(uint32_t id) { switch (ir.ids[id].get_type()) @@ -6948,7 +7429,7 @@ void CompilerGLSL::track_expression_read(uint32_t id) // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. - if (expression_is_forwarded(id)) + if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) { auto &v = expression_usage_counts[id]; v++; @@ -7019,18 +7500,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) return expr; } +void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) +{ + if (var.allocate_temporary_copy) + { + auto &type = get<SPIRType>(var.basetype); + auto &flags = get_decoration_bitset(var.self); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + } +} + void CompilerGLSL::flush_variable_declaration(uint32_t id) { auto *var = maybe_get<SPIRVariable>(id); if (var && var->deferred_declaration) { statement(variable_decl_function_local(*var), ";"); - if (var->allocate_temporary_copy) - { - auto &type = get<SPIRType>(var->basetype); - auto &flags = ir.meta[id].decoration.decoration_flags; - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";"); - } + emit_variable_temporary_copies(*var); var->deferred_declaration = false; } } @@ -7140,7 +7626,7 @@ bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) { - uint32_t base = 0; + ID base = 0; string op; string subop; @@ -7198,10 +7684,10 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32 if (i) op += ", "; - subop = to_expression(elems[i]); + subop = to_composite_constructor_expression(elems[i]); } - base = e ? e->base_expression : 0; + base = e ? e->base_expression : ID(0); } if (swizzle_optimization) @@ -7285,14 +7771,19 @@ void CompilerGLSL::emit_block_instructions(SPIRBlock &block) void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) { - if (forwarded_temporaries.count(expr.self)) + // Allow trivially forwarded expressions like OpLoad or trivial shuffles, + // these will be marked as having suppressed usage tracking. + // Our only concern is to make sure arithmetic operations are done in similar ways. + if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && + forced_invariant_temporaries.count(expr.self) == 0) { forced_temporaries.insert(expr.self); + forced_invariant_temporaries.insert(expr.self); force_recompile(); - } - for (auto &dependent : expr.expression_dependencies) - disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent)); + for (auto &dependent : expr.expression_dependencies) + disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent)); + } } void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) @@ -7357,6 +7848,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: return expression_type(ops[2]).width; default: @@ -7442,19 +7937,39 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) bool old_need_transpose = false; auto *ptr_expression = maybe_get<SPIRExpression>(ptr); - if (ptr_expression && ptr_expression->need_transpose) + + if (forward) { - old_need_transpose = true; - ptr_expression->need_transpose = false; - need_transpose = true; + // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while + // taking the expression. + if (ptr_expression && ptr_expression->need_transpose) + { + old_need_transpose = true; + ptr_expression->need_transpose = false; + need_transpose = true; + } + else if (is_non_native_row_major_matrix(ptr)) + need_transpose = true; } - else if (is_non_native_row_major_matrix(ptr)) - need_transpose = true; // If we are forwarding this load, // don't register the read to access chain here, defer that to when we actually use the expression, // using the add_implied_read_expression mechanism. - auto expr = to_dereferenced_expression(ptr, !forward); + string expr; + + bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked); + bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID); + if (forward || (!is_packed && !is_remapped)) + { + // For the simple case, we do not need to deal with repacking. + expr = to_dereferenced_expression(ptr, false); + } + else + { + // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before + // storing the expression to a temporary. + expr = to_unpacked_expression(ptr); + } // We might need to bitcast in order to load from a builtin. bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type)); @@ -7465,10 +7980,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) unroll_array_from_complex_load(id, ptr, expr); auto &type = get<SPIRType>(result_type); - if (has_decoration(id, DecorationNonUniformEXT)) + // Shouldn't need to check for ID, but current glslang codegen requires it in some cases + // when loading Image/Sampler descriptors. It does not hurt to check ID as well. + if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) + { + propagate_nonuniform_qualifier(ptr); convert_non_uniform_expression(type, expr); + } - if (ptr_expression) + if (forward && ptr_expression) ptr_expression->need_transpose = old_need_transpose; // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. @@ -7484,7 +8004,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); - emit_array_copy(to_expression(id), ptr); + emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr)); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); @@ -7492,12 +8012,22 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) e->need_transpose = need_transpose; register_read(id, ptr, forward); - // Pass through whether the result is of a packed type. - if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked)) + if (forward) { - set_extended_decoration(id, SPIRVCrossDecorationPacked); - set_extended_decoration(id, SPIRVCrossDecorationPackedType, - get_extended_decoration(ptr, SPIRVCrossDecorationPackedType)); + // Pass through whether the result is of a packed type and the physical type ID. + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) + { + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, + get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); + } + } + else + { + // This might have been set on an earlier compilation iteration, force it to be unset. + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); } inherit_expression_dependencies(id, ptr); @@ -7523,24 +8053,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2])); auto *backing_variable = maybe_get_backing_variable(ops[2]); - expr.loaded_from = backing_variable ? backing_variable->self : ops[2]; + expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); expr.need_transpose = meta.need_transpose; expr.access_chain = true; // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. if (meta.storage_is_packed) - set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); + for (uint32_t i = 2; i < length; i++) { inherit_expression_dependencies(ops[1], ops[i]); add_implied_read_expression(expr, ops[i]); } + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + break; } @@ -7548,6 +8090,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto *var = maybe_get<SPIRVariable>(ops[0]); + if (has_decoration(ops[0], DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ops[0]); + if (var && var->statically_assigned) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) @@ -7637,13 +8182,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (skip_argument(arg[i])) continue; - arglist.push_back(to_func_call_arg(arg[i])); + arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); } for (auto &combined : callee.combined_parameters) { - uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; - uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; + auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); + auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } @@ -7746,15 +8291,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) forward = false; string constructor_op; - if (!backend.array_is_value_type && out_type.array.size() > 1) - { - // We cannot construct array of arrays because we cannot treat the inputs - // as value types. Need to declare the array-of-arrays, and copy in elements one by one. - emit_uninitialized_temporary_expression(result_type, id); - for (uint32_t i = 0; i < length; i++) - emit_array_copy(join(to_expression(id), "[", i, "]"), elems[i]); - } - else if (backend.use_initializer_list && composite) + if (backend.use_initializer_list && composite) { // Only use this path if we are building composites. // This path cannot be used for arithmetic. @@ -7764,14 +8301,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) - constructor_op += to_expression(elems[0]); + constructor_op += to_unpacked_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += " }"; } else if (swizzle_splat && !composite) { - constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0])); + constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0])); } else { @@ -7779,7 +8316,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) - constructor_op += to_expression(elems[0]); + constructor_op += to_unpacked_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += ")"; @@ -7841,7 +8378,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) allow_base_expression = false; // Packed expressions cannot be split up. - if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked)) + if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked)) + allow_base_expression = false; + + // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern + // into the base expression. + if (is_non_native_row_major_matrix(ops[2])) allow_base_expression = false; AccessChainMeta meta; @@ -7864,14 +8406,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); - e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); + e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; } else { auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); - e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); } @@ -7880,9 +8422,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // instead of loading everything through an access chain. e->need_transpose = meta.need_transpose; if (meta.storage_is_packed) - set_extended_decoration(id, SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type); + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(id, DecorationInvariant); @@ -7930,13 +8472,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t rhs = ops[2]; bool pointer = get<SPIRType>(result_type).pointer; - if (expression_is_lvalue(rhs) && !pointer) + auto *chain = maybe_get<SPIRAccessChain>(rhs); + if (chain) + { + // Cannot lower to a SPIRExpression, just copy the object. + auto &e = set<SPIRAccessChain>(id, *chain); + e.self = id; + } + else if (expression_is_lvalue(rhs) && !pointer) { // Need a copy. // For pointer types, we copy the pointer itself. - statement(declare_temporary(result_type, id), to_expression(rhs), ";"); + statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";"); set<SPIRExpression>(id, to_name(id), result_type, true); - inherit_expression_dependencies(id, rhs); } else { @@ -7947,7 +8495,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (pointer) { auto *var = maybe_get_backing_variable(rhs); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); + } + + // If we're copying an access chain, need to inherit the read expressions. + auto *rhs_expr = maybe_get<SPIRExpression>(rhs); + if (rhs_expr) + { + e.implied_read_expressions = rhs_expr->implied_read_expressions; + e.expression_dependencies = rhs_expr->expression_dependencies; } } break; @@ -7972,7 +8528,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) shuffle = true; // Cannot use swizzles with packed expressions, force shuffle path. - if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked)) + if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) shuffle = true; string expr; @@ -7981,7 +8537,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (shuffle) { should_fwd = should_forward(vec0) && should_forward(vec1); - trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); + trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1); // Constructor style and shuffling from two different vectors. SmallVector<string> args; @@ -7994,7 +8550,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // a value we might not need, and bog down codegen. SPIRConstant c; c.constant_type = type0.parent_type; - assert(type0.parent_type != 0); + assert(type0.parent_type != ID(0)); args.push_back(constant_expression(c)); } else if (elems[i] >= type0.vecsize) @@ -8007,7 +8563,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else { should_fwd = should_forward(vec0); - trivial_forward = !expression_is_forwarded(vec0); + trivial_forward = should_suppress_usage_tracking(vec0); // We only source from first vector, so can use swizzle. // If the vector is packed, unpack it before applying a swizzle (needed for MSL) @@ -8027,8 +8583,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. emit_op(result_type, id, expr, should_fwd, trivial_forward); + inherit_expression_dependencies(id, vec0); - inherit_expression_dependencies(id, vec1); + if (vec0 != vec1) + inherit_expression_dependencies(id, vec1); break; } @@ -8084,18 +8642,56 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (e && e->need_transpose) { e->need_transpose = false; - emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); + string expr; + + if (opcode == OpMatrixTimesVector) + expr = join(to_enclosed_unpacked_expression(ops[3]), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + else + expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + to_enclosed_unpacked_expression(ops[2])); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } + case OpMatrixTimesMatrix: + { + auto *a = maybe_get<SPIRExpression>(ops[2]); + auto *b = maybe_get<SPIRExpression>(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + + break; + } + case OpFMul: case OpMatrixTimesScalar: case OpVectorTimesScalar: - case OpMatrixTimesMatrix: GLSL_BOP(*); break; @@ -8170,7 +8766,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get<SPIRType>(result_type); emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; @@ -8279,7 +8874,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get<SPIRType>(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); else GLSL_BOP(||); break; @@ -8293,7 +8888,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &type = get<SPIRType>(result_type); if (type.vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); else GLSL_BOP(&&); break; @@ -8350,7 +8945,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpUGreaterThan: case OpSGreaterThan: { - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThan, type); else @@ -8370,7 +8965,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpUGreaterThanEqual: case OpSGreaterThanEqual: { - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThanEqual, type); else @@ -8390,7 +8985,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpULessThan: case OpSLessThan: { - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThan, type); else @@ -8410,7 +9005,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpULessThanEqual: case OpSLessThanEqual: { - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThanEqual, type); else @@ -8618,23 +9213,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Bitfield case OpBitFieldInsert: - // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. - GLSL_QFOP(bitfieldInsert); + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int); break; + } case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type, + SPIRType::Int, SPIRType::Int); + break; + } + case OpBitFieldUExtract: - // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. - GLSL_TFOP(bitfieldExtract); + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type, + SPIRType::Int, SPIRType::Int); break; + } case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. GLSL_UFOP(bitfieldReverse); break; case OpBitCount: - GLSL_UFOP(bitCount); + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type); break; + } // Atomics case OpAtomicExchange: @@ -8823,7 +9431,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // When using the image, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -8883,6 +9491,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_sampled_image_op(result_type, id, ops[2], ops[3]); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); break; } @@ -9044,7 +9654,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -9304,6 +9914,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } + else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info) + { + break; // Ignore SPIR-V debug information extended instructions. + } else { statement("// unimplemented ext op ", instruction.op); @@ -9495,28 +10109,98 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; case OpFUnordEqual: - GLSL_BFOP(unsupported_FUnordEqual); - break; - case OpFUnordNotEqual: - GLSL_BFOP(unsupported_FUnordNotEqual); - break; - case OpFUnordLessThan: - GLSL_BFOP(unsupported_FUnordLessThan); - break; - case OpFUnordGreaterThan: - GLSL_BFOP(unsupported_FUnordGreaterThan); - break; - case OpFUnordLessThanEqual: - GLSL_BFOP(unsupported_FUnordLessThanEqual); - break; - case OpFUnordGreaterThanEqual: - GLSL_BFOP(unsupported_FUnordGreaterThanEqual); + { + // GLSL doesn't specify if floating point comparisons are ordered or unordered, + // but glslang always emits ordered floating point compares for GLSL. + // To get unordered compares, we can test the opposite thing and invert the result. + // This way, we force true when there is any NaN present. + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + string expr; + if (expression_type(op0).vecsize > 1) + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = "notEqual"; + break; + + case OpFUnordNotEqual: + comp_op = "equal"; + break; + + case OpFUnordLessThan: + comp_op = "greaterThanEqual"; + break; + + case OpFUnordLessThanEqual: + comp_op = "greaterThan"; + break; + + case OpFUnordGreaterThan: + comp_op = "lessThanEqual"; + break; + + case OpFUnordGreaterThanEqual: + comp_op = "lessThan"; + break; + + default: + assert(0); + break; + } + + expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); + } + else + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = " != "; + break; + + case OpFUnordNotEqual: + comp_op = " == "; + break; + + case OpFUnordLessThan: + comp_op = " >= "; + break; + + case OpFUnordLessThanEqual: + comp_op = " > "; + break; + + case OpFUnordGreaterThan: + comp_op = " <= "; + break; + + case OpFUnordGreaterThanEqual: + comp_op = " < "; + break; + + default: + assert(0); + break; + } + + expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); + } + + emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(ops[1], op0); + inherit_expression_dependencies(ops[1], op1); break; + } case OpReportIntersectionNV: statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); @@ -9564,6 +10248,57 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Undefined value has been declared. break; + case OpLine: + { + emit_line_directive(ops[0], ops[1]); + break; + } + + case OpNoLine: + break; + + case OpDemoteToHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + statement(backend.demote_literal, ";"); + break; + + case OpIsHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + emit_op(ops[0], ops[1], "helperInvocationEXT()", false); + break; + + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -9593,12 +10328,18 @@ void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t in if (var_id) flush_variable_declaration(var_id); - arglist.push_back(to_func_call_arg(arg.id)); + arglist.push_back(to_func_call_arg(arg, arg.id)); } } string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) { + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + return to_member_name(get<SPIRType>(type.type_alias), index); + } + auto &memb = ir.meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) return memb[index].alias; @@ -9674,21 +10415,50 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u return true; } +// Checks if we need to remap physical type IDs when declaring the type in a buffer. +bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); +} + // Checks whether the member is in packed data type, that might need to be unpacked. -// GLSL does not define packed data types, but certain subclasses do. -bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const +bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const { - return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked); + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); } // Wraps the expression string in a function call that converts the // row_major matrix result of the expression to a column_major matrix. // Base implementation uses the standard library transpose() function. // Subclasses may override to use a different function. -string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/) +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, + bool /*is_packed*/) { strip_enclosed_expression(exp_str); - return join("transpose(", exp_str, ")"); + if (!is_matrix(exp_type)) + { + auto column_index = exp_str.find_last_of('['); + if (column_index == string::npos) + return exp_str; + + auto column_expr = exp_str.substr(column_index); + exp_str.resize(column_index); + + auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; + + // Loading a column from a row-major matrix. Unroll the load. + for (uint32_t c = 0; c < exp_type.vecsize; c++) + { + transposed_expr += join(exp_str, '[', c, ']', column_expr); + if (c + 1 < exp_type.vecsize) + transposed_expr += ", "; + } + + transposed_expr += ")"; + return transposed_expr; + } + else + return join("transpose(", exp_str, ")"); } string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) @@ -9721,6 +10491,10 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type variable_decl(membertype, to_member_name(type, index)), ";"); } +void CompilerGLSL::emit_struct_padding_target(const SPIRType &) +{ +} + const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) { // GL_EXT_buffer_reference variables can be marked as restrict. @@ -9779,7 +10553,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { - return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags); + auto &type = expression_type(id); + bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; + if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + { + // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. + auto &result_type = get<SPIRType>(type.image.type); + if (result_type.width < 32) + return "mediump "; + } + return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } string CompilerGLSL::to_qualifiers_glsl(uint32_t id) @@ -9995,15 +10778,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) switch (imagetype.basetype) { case SPIRType::Int: + case SPIRType::Short: + case SPIRType::SByte: res = "i"; break; case SPIRType::UInt: + case SPIRType::UShort: + case SPIRType::UByte: res = "u"; break; default: break; } + // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. + // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) return res + "subpassInput" + (type.image.ms ? "MS" : ""); @@ -10312,7 +11102,7 @@ void CompilerGLSL::require_extension_internal(const string &ext) } } -void CompilerGLSL::flatten_buffer_block(uint32_t id) +void CompilerGLSL::flatten_buffer_block(VariableID id) { auto &var = get<SPIRVariable>(id); auto &type = get<SPIRType>(var.basetype); @@ -10428,7 +11218,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret if (func.self == ir.default_entry_point) { - decl += "main"; + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; + processing_entry_point = true; } else @@ -10503,6 +11299,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) } } + if (func.entry_line.file_id != 0) + emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); emit_function_prototype(func, return_flags); begin_scope(); @@ -10523,6 +11321,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) for (auto &v : func.local_variables) { auto &var = get<SPIRVariable>(v); + var.deferred_declaration = false; + if (var.storage == StorageClassWorkgroup) { // Special variable type which cannot have initializer, @@ -10582,15 +11382,29 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) var.deferred_declaration = false; } + // Enforce declaration order for regression testing purposes. + for (auto &block_id : func.blocks) + { + auto &block = get<SPIRBlock>(block_id); + sort(begin(block.dominated_variables), end(block.dominated_variables)); + } + for (auto &line : current_function->fixup_hooks_in) line(); - entry_block.loop_dominator = SPIRBlock::NoDominator; emit_block_chain(entry_block); end_scope(); processing_entry_point = false; statement(""); + + // Make sure deferred declaration state for local variables is cleared when we are done with function. + // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. + for (auto &v : func.local_variables) + { + auto &var = get<SPIRVariable>(v); + var.deferred_declaration = false; + } } void CompilerGLSL::emit_fixup() @@ -10609,18 +11423,11 @@ void CompilerGLSL::emit_fixup() } } -bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) -{ - auto &child = get<SPIRBlock>(to); - for (auto &phi : child.phi_variables) - if (phi.parent == from) - return true; - return false; -} - -void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) +void CompilerGLSL::flush_phi(BlockID from, BlockID to) { auto &child = get<SPIRBlock>(to); + if (child.ignore_phi_from_block == from) + return; unordered_set<uint32_t> temporary_phi_variables; @@ -10645,7 +11452,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. bool need_saved_temporary = find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { - return future_phi.local_variable == phi.function_variable && future_phi.parent == from; + return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; }) != end(child.phi_variables); if (need_saved_temporary) @@ -10680,7 +11487,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) } } -void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) +void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) { auto &to_block = get<SPIRBlock>(to); if (from == to) @@ -10691,16 +11498,11 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) { // Just emit the whole block chain as is. auto usage_counts = expression_usage_counts; - auto invalid = invalid_expressions; emit_block_chain(to_block); - // Expression usage counts and invalid expressions - // are moot after returning from the continue block. - // Since we emit the same block multiple times, - // we don't want to invalidate ourselves. + // Expression usage counts are moot after returning from the continue block. expression_usage_counts = usage_counts; - invalid_expressions = invalid; } else { @@ -10715,23 +11517,23 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) // so just use "self" here. loop_dominator = from; } - else if (from_block.loop_dominator != SPIRBlock::NoDominator) + else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { loop_dominator = from_block.loop_dominator; } if (loop_dominator != 0) { - auto &dominator = get<SPIRBlock>(loop_dominator); + auto &cfg = get_cfg_for_current_function(); // For non-complex continue blocks, we implicitly branch to the continue block // by having the continue block be part of the loop header in for (; ; continue-block). - outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); } // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block. - // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, + // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, // we can avoid writing out an explicit continue statement. // Similar optimization to return statements if we know we're outside flow control. if (!outside_control_flow) @@ -10739,11 +11541,12 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) } } -void CompilerGLSL::branch(uint32_t from, uint32_t to) +void CompilerGLSL::branch(BlockID from, BlockID to) { flush_phi(from, to); flush_control_dependent_expressions(from); - flush_all_active_variables(); + + bool to_is_continue = is_continue(to); // This is only a continue if we branch to our loop dominator. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to) @@ -10760,7 +11563,8 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, // write to the ladder here, and defer the break. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. - if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u && + if (current_emitting_switch && is_loop_break(to) && + current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to) { if (!current_emitting_switch->need_ladder_break) @@ -10773,12 +11577,25 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) } statement("break;"); } - else if (is_continue(to) || (from == to)) + else if (to_is_continue || from == to) { // For from == to case can happen for a do-while loop which branches into itself. // We don't mark these cases as continue blocks, but the only possible way to branch into // ourselves is through means of continue blocks. - branch_to_continue(from, to); + + // If we are merging to a continue block, there is no need to emit the block chain for continue here. + // We can branch to the continue block after we merge execution. + + // Here we make use of structured control flow rules from spec: + // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block + // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG + // If we are branching to a merge block, we must be inside a construct which dominates the merge block. + auto &block_meta = ir.block_meta[to]; + bool branching_to_merge = + (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | + ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; + if (!to_is_continue || !branching_to_merge) + branch_to_continue(from, to); } else if (!is_conditional(to)) emit_block_chain(get<SPIRBlock>(to)); @@ -10789,12 +11606,19 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) // Inner scope always takes precedence. } -void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) +void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) { - // If we branch directly to a selection merge target, we don't really need a code path. + auto &from_block = get<SPIRBlock>(from); + BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); + + // If we branch directly to a selection merge target, we don't need a code path. + // This covers both merge out of if () / else () as well as a break for switch blocks. bool true_sub = !is_conditional(true_block); bool false_sub = !is_conditional(false_block); + bool true_block_is_selection_merge = true_block == merge_block; + bool false_block_is_selection_merge = false_block == merge_block; + if (true_sub) { emit_block_hints(get<SPIRBlock>(from)); @@ -10803,7 +11627,11 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin branch(from, true_block); end_scope(); - if (false_sub || is_continue(false_block) || is_break(false_block)) + // If we merge to continue, we handle that explicitly in emit_block_chain(), + // so there is no need to branch to it directly here. + // break; is required to handle ladder fallthrough cases, so keep that in for now, even + // if we could potentially handle it in emit_block_chain(). + if (false_sub || (!false_block_is_selection_merge && is_continue(false_block)) || is_break(false_block)) { statement("else"); begin_scope(); @@ -10818,7 +11646,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin end_scope(); } } - else if (false_sub && !true_sub) + else if (false_sub) { // Only need false path, use negative conditional. emit_block_hints(get<SPIRBlock>(from)); @@ -10827,7 +11655,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin branch(from, false_block); end_scope(); - if (is_continue(true_block) || is_break(true_block)) + if ((!true_block_is_selection_merge && is_continue(true_block)) || is_break(true_block)) { statement("else"); begin_scope(); @@ -10844,44 +11672,6 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin } } -void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block) -{ - // Propagate down the loop dominator block, so that dominated blocks can back trace. - if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator) - { - uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator; - - auto set_dominator = [this](uint32_t self, uint32_t new_dominator) { - auto &dominated_block = this->get<SPIRBlock>(self); - - // If we already have a loop dominator, we're trying to break out to merge targets - // which should not update the loop dominator. - if (!dominated_block.loop_dominator) - dominated_block.loop_dominator = new_dominator; - }; - - // After merging a loop, we inherit the loop dominator always. - if (block.merge_block) - set_dominator(block.merge_block, block.loop_dominator); - - if (block.true_block) - set_dominator(block.true_block, dominator); - if (block.false_block) - set_dominator(block.false_block, dominator); - if (block.next_block) - set_dominator(block.next_block, dominator); - if (block.default_block) - set_dominator(block.default_block, dominator); - - for (auto &c : block.cases) - set_dominator(c.block, dominator); - - // In older glslang output continue_block can be == loop header. - if (block.continue_block && block.continue_block != block.self) - set_dominator(block.continue_block, dominator); - } -} - // FIXME: This currently cannot handle complex continue blocks // as in do-while. // This should be seen as a "trivial" continue block. @@ -10902,7 +11692,6 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_tr // Stamp out all blocks one after each other. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) { - propagate_loop_dominators(*block); // Write out all instructions we have in this block. emit_block_instructions(*block); @@ -11114,7 +11903,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method } default: - SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; } begin_scope(); @@ -11146,7 +11938,6 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method if (current_count == statement_count && condition_is_temporary) { - propagate_loop_dominators(child); uint32_t target_block = child.true_block; switch (continue_type) @@ -11189,7 +11980,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method } default: - SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; } begin_scope(); @@ -11210,18 +12004,16 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) { - // Enforce declaration order for regression testing purposes. - sort(begin(block.dominated_variables), end(block.dominated_variables)); for (auto &v : block.dominated_variables) flush_variable_declaration(v); } -void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>> &temporaries) +void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries) { // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. // Need to sort these to ensure that reference output is stable. sort(begin(temporaries), end(temporaries), - [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; }); + [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; }); for (auto &tmp : temporaries) { @@ -11240,8 +12032,6 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t> void CompilerGLSL::emit_block_chain(SPIRBlock &block) { - propagate_loop_dominators(block); - bool select_branch_to_true_block = false; bool select_branch_to_false_block = false; bool skip_direct_branch = false; @@ -11255,8 +12045,22 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) continue_type = continue_block_type(get<SPIRBlock>(block.continue_block)); // If we have loop variables, stop masking out access to the variable now. - for (auto var : block.loop_variables) - get<SPIRVariable>(var).loop_variable_enable = true; + for (auto var_id : block.loop_variables) + { + auto &var = get<SPIRVariable>(var_id); + var.loop_variable_enable = true; + // We're not going to declare the variable directly, so emit a copy here. + emit_variable_temporary_copies(var); + } + + // Remember deferred declaration state. We will restore it before returning. + SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var_id = block.dominated_variables[i]; + auto &var = get<SPIRVariable>(var_id); + rearm_dominated_variables[i] = var.deferred_declaration; + } // This is the method often used by spirv-opt to implement loops. // The loop header goes straight into the continue block. @@ -11416,7 +12220,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) case SPIRBlock::MultiSelect: { auto &type = expression_type(block.condition); - bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort; + bool unsigned_case = + type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte; if (block.merge == SPIRBlock::MergeNone) SPIRV_CROSS_THROW("Switch statement is not structured"); @@ -11441,61 +12246,182 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (block.need_ladder_break) statement("bool _", block.self, "_ladder_break = false;"); + // Find all unique case constructs. + unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs; + SmallVector<uint32_t> block_declaration_order; + SmallVector<uint32_t> literals_to_merge; + + // If a switch case branches to the default block for some reason, we can just remove that literal from consideration + // and let the default: block handle it. + // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. + // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. + for (auto &c : block.cases) + { + if (c.block != block.next_block && c.block != block.default_block) + { + if (!case_constructs.count(c.block)) + block_declaration_order.push_back(c.block); + case_constructs[c.block].push_back(c.value); + } + else if (c.block == block.next_block && block.default_block != block.next_block) + { + // We might have to flush phi inside specific case labels. + // If we can piggyback on default:, do so instead. + literals_to_merge.push_back(c.value); + } + } + + // Empty literal array -> default. + if (block.default_block != block.next_block) + { + auto &default_block = get<SPIRBlock>(block.default_block); + + // We need to slide in the default block somewhere in this chain + // if there are fall-through scenarios since the default is declared separately in OpSwitch. + // Only consider trivial fall-through cases here. + size_t num_blocks = block_declaration_order.size(); + bool injected_block = false; + + for (size_t i = 0; i < num_blocks; i++) + { + auto &case_block = get<SPIRBlock>(block_declaration_order[i]); + if (execution_is_direct_branch(case_block, default_block)) + { + // Fallthrough to default block, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); + injected_block = true; + break; + } + else if (execution_is_direct_branch(default_block, case_block)) + { + // Default case is falling through to another case label, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); + injected_block = true; + break; + } + } + + // Order does not matter. + if (!injected_block) + block_declaration_order.push_back(block.default_block); + + case_constructs[block.default_block] = {}; + } + + size_t num_blocks = block_declaration_order.size(); + + const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string { + return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal)); + }; + + // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, + // we need to flush phi nodes outside the switch block in a branch, + // and skip any Phi handling inside the case label to make fall-through work as expected. + // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this + // inside the case label if at all possible. + for (size_t i = 1; i < num_blocks; i++) + { + if (flush_phi_required(block.self, block_declaration_order[i]) && + flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) + { + uint32_t target_block = block_declaration_order[i]; + + // Make sure we flush Phi, it might have been marked to be ignored earlier. + get<SPIRBlock>(target_block).ignore_phi_from_block = 0; + + auto &literals = case_constructs[target_block]; + + if (literals.empty()) + { + // Oh boy, gotta make a complete negative test instead! o.o + // Find all possible literals that would *not* make us enter the default block. + // If none of those literals match, we flush Phi ... + SmallVector<string> conditions; + for (size_t j = 0; j < num_blocks; j++) + { + auto &negative_literals = case_constructs[block_declaration_order[j]]; + for (auto &case_label : negative_literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " != ", to_case_label(case_label, unsigned_case))); + } + + statement("if (", merge(conditions, " && "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + else + { + SmallVector<string> conditions; + conditions.reserve(literals.size()); + for (auto &case_label : literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " == ", to_case_label(case_label, unsigned_case))); + statement("if (", merge(conditions, " || "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + + // Mark the block so that we don't flush Phi from header to case label. + get<SPIRBlock>(target_block).ignore_phi_from_block = block.self; + } + } + emit_block_hints(block); statement("switch (", to_expression(block.condition), ")"); begin_scope(); - // Multiple case labels can branch to same block, so find all unique blocks. - bool emitted_default = false; - unordered_set<uint32_t> emitted_blocks; - - for (auto &c : block.cases) + for (size_t i = 0; i < num_blocks; i++) { - if (emitted_blocks.count(c.block) != 0) - continue; + uint32_t target_block = block_declaration_order[i]; + auto &literals = case_constructs[target_block]; - // Emit all case labels which branch to our target. - // FIXME: O(n^2), revisit if we hit shaders with 100++ case labels ... - for (auto &other_case : block.cases) + if (literals.empty()) { - if (other_case.block == c.block) + // Default case. + statement("default:"); + } + else + { + for (auto &case_literal : literals) { // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. - auto case_value = unsigned_case ? convert_to_string(uint32_t(other_case.value)) : - convert_to_string(int32_t(other_case.value)); - statement("case ", case_value, label_suffix, ":"); + statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); } } - // Maybe we share with default block? - if (block.default_block == c.block) + auto &case_block = get<SPIRBlock>(target_block); + if (backend.support_case_fallthrough && i + 1 < num_blocks && + execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1]))) { - statement("default:"); - emitted_default = true; + // We will fall through here, so just terminate the block chain early. + // We still need to deal with Phi potentially. + // No need for a stack-like thing here since we only do fall-through when there is a + // single trivial branch to fall-through target.. + current_emitting_switch_fallthrough = true; } - - // Complete the target. - emitted_blocks.insert(c.block); + else + current_emitting_switch_fallthrough = false; begin_scope(); - branch(block.self, c.block); + branch(block.self, target_block); end_scope(); + + current_emitting_switch_fallthrough = false; } - if (!emitted_default) + // Might still have to flush phi variables if we branch from loop header directly to merge target. + if (flush_phi_required(block.self, block.next_block)) { - if (block.default_block != block.next_block) - { - statement("default:"); - begin_scope(); - if (is_break(block.default_block)) - SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ..."); - branch(block.self, block.default_block); - end_scope(); - } - else if (flush_phi_required(block.self, block.next_block)) + if (block.default_block == block.next_block || !literals_to_merge.empty()) { - statement("default:"); + for (auto &case_literal : literals_to_merge) + statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); + + if (block.default_block == block.next_block) + statement("default:"); + begin_scope(); flush_phi(block.self, block.next_block); statement("break;"); @@ -11518,12 +12444,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } case SPIRBlock::Return: + { for (auto &line : current_function->fixup_hooks_out) line(); if (processing_entry_point) emit_fixup(); + auto &cfg = get_cfg_for_current_function(); + if (block.return_value) { auto &type = expression_type(block.return_value); @@ -11532,10 +12461,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // If we cannot return arrays, we will have a special out argument we can write to instead. // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) - emit_array_copy("SPIRV_Cross_return_value", block.return_value); + { + emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction, + get_backing_variable_storage(block.return_value)); + } - if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) || - block.loop_dominator != SPIRBlock::NoDominator) + if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { statement("return;"); } @@ -11547,16 +12479,17 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) statement("return ", to_expression(block.return_value), ";"); } } - // If this block is the very final block and not called from control flow, - // we do not need an explicit return which looks out of place. Just end the function here. - // In the very weird case of for(;;) { return; } executing return is unconditional, - // but we actually need a return here ... - else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) || - block.loop_dominator != SPIRBlock::NoDominator) + else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... statement("return;"); } break; + } case SPIRBlock::Kill: statement(backend.discard_literal, ";"); @@ -11577,22 +12510,26 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) if (block.merge != SPIRBlock::MergeSelection) flush_phi(block.self, block.next_block); - // For merge selects we might have ignored the fact that a merge target - // could have been a break; or continue; - // We will need to deal with it here. - if (is_loop_break(block.next_block)) - { - // Cannot check for just break, because switch statements will also use break. - assert(block.merge == SPIRBlock::MergeSelection); - statement("break;"); - } - else if (is_continue(block.next_block)) + // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. + if (!current_emitting_switch_fallthrough) { - assert(block.merge == SPIRBlock::MergeSelection); - branch_to_continue(block.self, block.next_block); + // For merge selects we might have ignored the fact that a merge target + // could have been a break; or continue; + // We will need to deal with it here. + if (is_loop_break(block.next_block)) + { + // Cannot check for just break, because switch statements will also use break. + assert(block.merge == SPIRBlock::MergeSelection); + statement("break;"); + } + else if (is_continue(block.next_block)) + { + assert(block.merge == SPIRBlock::MergeSelection); + branch_to_continue(block.self, block.next_block); + } + else if (BlockID(block.self) != block.next_block) + emit_block_chain(get<SPIRBlock>(block.next_block)); } - else if (block.self != block.next_block) - emit_block_chain(get<SPIRBlock>(block.next_block)); } if (block.merge == SPIRBlock::MergeLoop) @@ -11636,6 +12573,20 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // Forget about control dependent expressions now. block.invalidate_expressions.clear(); + + // After we return, we must be out of scope, so if we somehow have to re-emit this function, + // re-declare variables if necessary. + assert(rearm_dominated_variables.size() == block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var = block.dominated_variables[i]; + get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i]; + } + + // Just like for deferred declaration, we need to forget about loop variable enable + // if our block chain is reinstantiated later. + for (auto &var_id : block.loop_variables) + get<SPIRVariable>(var_id).loop_variable_enable = false; } void CompilerGLSL::begin_scope() @@ -11652,6 +12603,14 @@ void CompilerGLSL::end_scope() statement("}"); } +void CompilerGLSL::end_scope(const string &trailer) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}", trailer); +} + void CompilerGLSL::end_scope_decl() { if (!indent) @@ -11708,7 +12667,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } -void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass) { statement(lhs, " = ", to_expression(rhs_id), ";"); } @@ -11793,6 +12752,7 @@ void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &ex case BuiltInBaseVertex: case BuiltInBaseInstance: case BuiltInDrawIndex: + case BuiltInFragStencilRefEXT: expected_type = SPIRType::Int; break; @@ -11828,6 +12788,7 @@ void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &exp case BuiltInLayer: case BuiltInPrimitiveId: case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: expected_type = SPIRType::Int; break; @@ -11898,3 +12859,124 @@ void CompilerGLSL::reset_name_caches() block_names.clear(); function_overloads.clear(); } + +void CompilerGLSL::fixup_type_alias() +{ + // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. + // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type + // alias if the slave type is a block type. + ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) { + if (type.type_alias && type_is_block_like(type)) + { + // Become the master. + ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) { + if (other_id == type.self) + return; + + if (other_type.type_alias == type.type_alias) + other_type.type_alias = type.self; + }); + + this->get<SPIRType>(type.type_alias).type_alias = self; + type.type_alias = 0; + } + }); + + ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { + if (type.type_alias && type_is_block_like(type)) + { + // This is not allowed, drop the type_alias. + type.type_alias = 0; + } + else if (type.type_alias && !type_is_block_like(this->get<SPIRType>(type.type_alias))) + { + // If the alias master is not a block-like type, there is no reason to use type aliasing. + // This case can happen if two structs are declared with the same name, but they are unrelated. + // Aliases are only used to deal with aliased types for structs which are used in different buffer types + // which all create a variant of the same struct with different DecorationOffset values. + type.type_alias = 0; + } + }); +} + +void CompilerGLSL::reorder_type_alias() +{ + // Reorder declaration of types so that the master of the type alias is always emitted first. + // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which + // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. + auto loop_lock = ir.create_loop_hard_lock(); + + auto &type_ids = ir.ids_for_type[TypeType]; + for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + { + auto &type = get<SPIRType>(*alias_itr); + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + // We will skip declaring this type, so make sure the type_alias type comes before. + auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); + assert(master_itr != end(type_ids)); + + if (alias_itr < master_itr) + { + // Must also swap the type order for the constant-type joined array. + auto &joined_types = ir.ids_for_constant_or_type; + auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); + auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); + assert(alt_alias_itr != end(joined_types)); + assert(alt_master_itr != end(joined_types)); + + swap(*alias_itr, *master_itr); + swap(*alt_alias_itr, *alt_master_itr); + } + } + } +} + +void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) +{ + // If we are redirecting statements, ignore the line directive. + // Common case here is continue blocks. + if (redirect_statement) + return; + + if (options.emit_line_directives) + { + require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); + statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\""); + } +} + +void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) +{ + // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, + // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. + // Back-propagate the qualifier based on the expression dependency chain. + + if (!has_decoration(id, DecorationNonUniformEXT)) + { + set_decoration(id, DecorationNonUniformEXT); + force_recompile(); + } + + auto *e = maybe_get<SPIRExpression>(id); + auto *combined = maybe_get<SPIRCombinedImageSampler>(id); + auto *chain = maybe_get<SPIRAccessChain>(id); + if (e) + { + for (auto &expr : e->expression_dependencies) + propagate_nonuniform_qualifier(expr); + for (auto &expr : e->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } + else if (combined) + { + propagate_nonuniform_qualifier(combined->image); + propagate_nonuniform_qualifier(combined->sampler); + } + else if (chain) + { + for (auto &expr : chain->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp index 184bbbd..6f59bd8 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp @@ -103,6 +103,10 @@ public: // Does not apply to shader storage or push constant blocks. bool emit_uniform_buffer_as_plain_uniforms = false; + // Emit OpLine directives if present in the module. + // May not correspond exactly to original source, but should be a good approximation. + bool emit_line_directives = false; + enum Precision { DontCare, @@ -205,7 +209,7 @@ public: // For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but // mixing int and float is not. // The name of the uniform array will be the same as the interface block name. - void flatten_buffer_block(uint32_t id); + void flatten_buffer_block(VariableID id); protected: void reset(); @@ -219,6 +223,7 @@ protected: SPIRBlock *current_emitting_block = nullptr; SPIRBlock *current_emitting_switch = nullptr; + bool current_emitting_switch_fallthrough = false; virtual void emit_instruction(const Instruction &instr); void emit_block_instructions(SPIRBlock &block); @@ -233,35 +238,40 @@ protected: virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); virtual void emit_header(); + void emit_line_directive(uint32_t file_id, uint32_t line_literal); void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x, const SpecializationConstant &y, const SpecializationConstant &z); virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); virtual void emit_texture_op(const Instruction &i); + virtual std::string to_texture_op(const Instruction &i, bool *forward, + SmallVector<uint32_t> &inherited_expressions); virtual void emit_subgroup_op(const Instruction &i); virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage); virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0); + virtual void emit_struct_padding_target(const SPIRType &type); virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0); std::string constant_expression(const SPIRConstant &c); std::string constant_op_expression(const SPIRConstantOp &cop); virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); virtual void emit_fixup(); virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); - virtual std::string to_func_call_arg(uint32_t id); - virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); + virtual std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, - bool has_dref, uint32_t lod); - virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool has_dref, uint32_t lod, uint32_t minlod); + virtual std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, - bool *p_forward); + uint32_t minlod, bool *p_forward); virtual void emit_buffer_block(const SPIRVariable &type); virtual void emit_push_constant_block(const SPIRVariable &var); virtual void emit_uniform(const SPIRVariable &var); - virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id); + virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed_type, bool row_major); StringStream<> buffer; @@ -322,6 +332,7 @@ protected: void begin_scope(); void end_scope(); + void end_scope(const std::string &trailer); void end_scope_decl(); void end_scope_decl(const std::string &decl); @@ -341,8 +352,10 @@ protected: virtual bool is_non_native_row_major_matrix(uint32_t id); virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); - bool member_is_packed_type(const SPIRType &type, uint32_t index) const; - virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed); + bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const; + bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const; + virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, + uint32_t physical_type_id, bool is_packed); std::unordered_set<std::string> local_variable_names; std::unordered_set<std::string> resource_names; @@ -363,6 +376,7 @@ protected: struct BackendVariations { std::string discard_literal = "discard"; + std::string demote_literal = "demote"; std::string null_pointer_literal = ""; bool float_literal_suffix = false; bool double_literal_suffix = true; @@ -377,6 +391,7 @@ protected: const char *int16_t_literal_suffix = "s"; const char *uint16_t_literal_suffix = "us"; const char *nonuniform_qualifier = "nonuniformEXT"; + const char *boolean_mix_function = "mix"; bool swizzle_is_function = false; bool shared_is_implied = false; bool unsized_array_supported = true; @@ -387,7 +402,6 @@ protected: bool can_declare_arrays_inline = true; bool native_row_major_matrix = true; bool use_constructor_splatting = true; - bool boolean_mix_support = true; bool allow_precision_qualifiers = false; bool can_swizzle_scalar = false; bool force_gl_in_out_block = false; @@ -398,6 +412,8 @@ protected: bool array_is_value_type = true; bool comparison_image_samples_scalar = false; bool native_pointers = false; + bool support_small_type_sampling_result = false; + bool support_case_fallthrough = true; } backend; void emit_struct(SPIRType &type); @@ -412,24 +428,24 @@ protected: void emit_interface_block(const SPIRVariable &type); void emit_flattened_io_block(const SPIRVariable &var, const char *qual); void emit_block_chain(SPIRBlock &block); - void emit_hoisted_temporaries(SmallVector<std::pair<uint32_t, uint32_t>> &temporaries); + void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries); std::string constant_value_macro_name(uint32_t id); void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block); bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); - void propagate_loop_dominators(const SPIRBlock &block); - void branch(uint32_t from, uint32_t to); - void branch_to_continue(uint32_t from, uint32_t to); - void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); - void flush_phi(uint32_t from, uint32_t to); - bool flush_phi_required(uint32_t from, uint32_t to); + void branch(BlockID from, BlockID to); + void branch_to_continue(BlockID from, BlockID to); + void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block); + void flush_phi(BlockID from, BlockID to); void flush_variable_declaration(uint32_t id); void flush_undeclared_variables(SPIRBlock &block); + void emit_variable_temporary_copies(const SPIRVariable &var); bool should_dereference(uint32_t id); - bool should_forward(uint32_t id); + bool should_forward(uint32_t id) const; + bool should_suppress_usage_tracking(uint32_t id) const; void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op); bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); @@ -445,11 +461,18 @@ protected: SPIRType::BaseType input_type, bool skip_cast_if_equal_type); void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType input_type); + void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2); + void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op, SPIRType::BaseType offset_count_type); void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); - void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + bool negate, SPIRType::BaseType expected_type); void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type); @@ -460,7 +483,8 @@ protected: uint32_t false_value); void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); - bool expression_is_forwarded(uint32_t id); + bool expression_is_forwarded(uint32_t id) const; + bool expression_suppresses_usage_tracking(uint32_t id) const; SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false); @@ -494,8 +518,11 @@ protected: SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist); std::string to_expression(uint32_t id, bool register_expression_read = true); + std::string to_composite_constructor_expression(uint32_t id); + std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type); std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_unpacked_row_major_matrix_expression(uint32_t id); std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true); std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true); std::string to_pointer_expression(uint32_t id, bool register_expression_read = true); @@ -517,15 +544,16 @@ protected: virtual std::string layout_for_member(const SPIRType &type, uint32_t index); virtual std::string to_interpolation_qualifiers(const Bitset &flags); std::string layout_for_variable(const SPIRVariable &variable); - std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); + std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; - virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); + virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, + spv::StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, uint32_t end_offset = ~(0u)); - std::string buffer_to_packing_standard(const SPIRType &type, bool enable_std430); + std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout); uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); @@ -661,6 +689,11 @@ protected: char current_locale_radix_character = '.'; + void fixup_type_alias(); + void reorder_type_alias(); + + void propagate_nonuniform_qualifier(uint32_t id); + private: void init(); }; diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp index 46613c5..4d4e276 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp @@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) } } -string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) { auto &imagetype = get<SPIRType>(type.image.type); const char *dim = nullptr; @@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); + } else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; const char *rw = typed_load ? "RW" : ""; + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; return join(rw, "Texture", dim, ms, arrayed, "<", typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : join(type_to_glsl(imagetype), components), @@ -1038,8 +1045,9 @@ void CompilerHLSL::emit_specialization_constants_and_structs() { bool emitted = false; SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + auto loop_lock = ir.create_loop_hard_lock(); for (auto &id_ : ir.ids_for_constant_or_type) { auto &id = ir.ids[id_]; @@ -1742,6 +1750,46 @@ void CompilerHLSL::emit_resources() end_scope(); statement(""); } + + if (requires_scalar_reflect) + { + // FP16/FP64? No templates in HLSL. + statement("float SPIRV_Cross_Reflect(float i, float n)"); + begin_scope(); + statement("return i - 2.0 * dot(n, i) * n;"); + end_scope(); + statement(""); + } + + if (requires_scalar_refract) + { + // FP16/FP64? No templates in HLSL. + statement("float SPIRV_Cross_Refract(float i, float n, float eta)"); + begin_scope(); + statement("float NoI = n * i;"); + statement("float NoI2 = NoI * NoI;"); + statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); + statement("if (k < 0.0)"); + begin_scope(); + statement("return 0.0;"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + } + + if (requires_scalar_faceforward) + { + // FP16/FP64? No templates in HLSL. + statement("float SPIRV_Cross_FaceForward(float n, float i, float nref)"); + begin_scope(); + statement("return i * nref < 0.0 ? n : -n;"); + end_scope(); + statement(""); + } } string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) @@ -1781,7 +1829,7 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type string packing_offset; bool is_push_constant = type.storage == StorageClassPushConstant; - if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) && + if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && has_member_decoration(type.self, index, DecorationOffset)) { uint32_t offset = memb[index].offset - base_offset; @@ -1807,16 +1855,20 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) Bitset flags = ir.get_buffer_block_flags(var); bool is_readonly = flags.get(DecorationNonWritable); bool is_coherent = flags.get(DecorationCoherent); + bool is_interlocked = interlocked_resources.count(var.self) > 0; + const char *type_name = "ByteAddressBuffer "; + if (!is_readonly) + type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", - to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); } else { if (type.array.empty()) { if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset)) - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); else SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset."); @@ -1902,7 +1954,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) auto &type = get<SPIRType>(var.basetype); if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end)) - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); else SPIRV_CROSS_THROW( "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset."); @@ -1973,9 +2025,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i } } -string CompilerHLSL::to_func_call_arg(uint32_t id) +string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { - string arg_str = CompilerGLSL::to_func_call_arg(id); + string arg_str = CompilerGLSL::to_func_call_arg(arg, id); if (hlsl_options.shader_model <= 30) return arg_str; @@ -2437,7 +2489,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t img = ops[2]; + VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; @@ -2449,6 +2501,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) inherited_expressions.push_back(coord); + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + switch (op) { case OpImageSampleDrefImplicitLod: @@ -2536,6 +2592,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) uint32_t offset = 0; uint32_t coffsets = 0; uint32_t sample = 0; + uint32_t minlod = 0; uint32_t flags = 0; if (length) @@ -2562,10 +2619,14 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) test(offset, ImageOperandsOffsetMask); test(coffsets, ImageOperandsConstOffsetsMask); test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); string expr; string texop; + if (minlod != 0) + SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL."); + if (op == OpImageFetch) { if (hlsl_options.shader_model < 40) @@ -2831,7 +2892,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) // according to GLSL spec, and it depends on the sampler itself. // Just assume X == Y, so we will need to splat the result to a float2. statement("float _", id, "_tmp = ", expr, ";"); - emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true); + statement("float2 _", id, " = _", id, "_tmp.xx;"); + set<SPIRExpression>(id, join("_", id), result_type, true); } else { @@ -2847,7 +2909,6 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleProjDrefImplicitLod: - case OpImageQueryLod: register_control_dependent_expression(id); break; @@ -3198,8 +3259,11 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); case GLSLstd450FindILsb: - emit_unary_func_op(result_type, id, args[0], "firstbitlow"); + { + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype); break; + } case GLSLstd450FindSMsb: emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); @@ -3240,6 +3304,59 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; } + case GLSLstd450Normalize: + // HLSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get<SPIRType>(result_type).vecsize == 1) + { + if (!requires_scalar_reflect) + { + requires_scalar_reflect = true; + force_recompile(); + } + emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get<SPIRType>(result_type).vecsize == 1) + { + if (!requires_scalar_refract) + { + requires_scalar_refract = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450FaceForward: + if (get<SPIRType>(result_type).vecsize == 1) + { + if (!requires_scalar_faceforward) + { + requires_scalar_faceforward = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_FaceForward"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; @@ -3384,6 +3501,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction) uint32_t id = ops[1]; uint32_t ptr = ops[2]; + if (has_decoration(ptr, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ptr); + auto load_expr = read_access_chain(*chain); bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); @@ -3417,6 +3537,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val // Make sure we trigger a read of the constituents in the access chain. track_expression_read(chain.self); + if (has_decoration(chain.self, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(chain.self); + SPIRType target_type; target_type.basetype = SPIRType::UInt; target_type.vecsize = type.vecsize; @@ -3601,7 +3724,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) e.row_major_matrix = row_major_matrix; e.matrix_stride = matrix_stride; e.immutable = should_forward(ops[2]); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? backing_variable->self : ID(0); if (chain) { @@ -3909,6 +4032,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); switch (opcode) { @@ -3933,22 +4057,50 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpMatrixTimesVector: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } case OpVectorTimesMatrix: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } case OpMatrixTimesMatrix: { + // Matrices are kept in a transposed state all the time, flip multiplication order always. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); break; } + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get<SPIRType>(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + case OpFMod: { if (!requires_op_fmod) @@ -4043,7 +4195,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP_CAST(==, int_type); break; @@ -4051,12 +4203,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalEqual: case OpFOrdEqual: + case OpFUnordEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. + // HACK: FUnordEqual will be implemented as FOrdEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); else HLSL_BOP(==); break; @@ -4068,7 +4227,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP_CAST(!=, int_type); break; @@ -4076,12 +4235,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpLogicalNotEqual: case OpFOrdNotEqual: + case OpFUnordNotEqual: { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + + // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. + // We would need to do something like not(UnordEqual), but that cannot be expressed either. + // Adding a lot of NaN checks would be a breaking change from perspective of performance. + // SPIR-V will generally use isnan() checks when this even matters. + // HACK: FOrdNotEqual will be implemented as FUnordEqual. + auto result_type = ops[0]; auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); else HLSL_BOP(!=); break; @@ -4092,10 +4262,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); else HLSL_BOP_CAST(>, type); break; @@ -4107,21 +4277,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); else HLSL_BOP(>); break; } + case OpFUnordGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpUGreaterThanEqual: case OpSGreaterThanEqual: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); else HLSL_BOP_CAST(>=, type); break; @@ -4133,21 +4315,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); else HLSL_BOP(>=); break; } + case OpFUnordGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThan: case OpSLessThan: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); else HLSL_BOP_CAST(<, type); break; @@ -4159,21 +4353,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); else HLSL_BOP(<); break; } + case OpFUnordLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpULessThanEqual: case OpSLessThanEqual: { auto result_type = ops[0]; auto id = ops[1]; - auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); else HLSL_BOP_CAST(<=, type); break; @@ -4185,12 +4391,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto id = ops[1]; if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); else HLSL_BOP(<=); break; } + case OpFUnordLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpImageQueryLod: emit_texture_op(instruction); break; @@ -4343,7 +4561,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -4501,8 +4719,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) } case OpBitCount: - HLSL_UFOP(countbits); + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype); break; + } case OpBitReverse: HLSL_UFOP(reversebits); @@ -4527,6 +4748,15 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpIsHelperInvocationEXT: + SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL."); + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -4601,7 +4831,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve remap_vertex_attributes.push_back(vertex_attributes); } -uint32_t CompilerHLSL::remap_num_workgroups_builtin() +VariableID CompilerHLSL::remap_num_workgroups_builtin() { update_active_builtins(); @@ -4683,23 +4913,28 @@ string CompilerHLSL::compile() backend.uint16_t_literal_suffix = "u"; backend.basic_int_type = "int"; backend.basic_uint_type = "uint"; + backend.demote_literal = "discard"; + backend.boolean_mix_function = ""; backend.swizzle_is_function = false; backend.shared_is_implied = true; backend.unsized_array_supported = true; backend.explicit_struct_type = false; backend.use_initializer_list = true; backend.use_constructor_splatting = false; - backend.boolean_mix_support = false; backend.can_swizzle_scalar = true; backend.can_declare_struct_inline = false; backend.can_declare_arrays_inline = false; backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; + backend.support_case_fallthrough = false; + fixup_type_alias(); + reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); validate_shader_model(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Subpass input needs SV_Position. if (need_subpass_input) diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp index d96c911..eb968f0 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp @@ -114,7 +114,7 @@ public: // If non-zero, this returns the variable ID of a cbuffer which corresponds to // the cbuffer declared above. By default, no binding or descriptor set decoration is set, // so the calling application should declare explicit bindings on this ID before calling compile(). - uint32_t remap_num_workgroups_builtin(); + VariableID remap_num_workgroups_builtin(); private: std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; @@ -145,7 +145,7 @@ private: std::string layout_for_member(const SPIRType &type, uint32_t index) override; std::string to_interpolation_qualifiers(const Bitset &flags) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; - std::string to_func_call_arg(uint32_t id) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_sampler_expression(uint32_t id); std::string to_resource_binding(const SPIRVariable &var); std::string to_resource_binding_sampler(const SPIRVariable &var); @@ -167,6 +167,8 @@ private: void replace_illegal_names() override; Options hlsl_options; + + // TODO: Refactor this to be more similar to MSL, maybe have some common system in place? bool requires_op_fmod = false; bool requires_fp16_packing = false; bool requires_explicit_fp16_packing = false; @@ -179,6 +181,9 @@ private: bool requires_inverse_2x2 = false; bool requires_inverse_3x3 = false; bool requires_inverse_4x4 = false; + bool requires_scalar_reflect = false; + bool requires_scalar_refract = false; + bool requires_scalar_faceforward = false; uint64_t required_textureSizeVariants = 0; void require_texture_query_variant(const SPIRType &type); diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp index 4a4f77a..d7cb138 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp @@ -28,8 +28,6 @@ using namespace std; static const uint32_t k_unknown_location = ~0u; static const uint32_t k_unknown_component = ~0u; -static const uint32_t k_aux_mbr_idx_swizzle_const = 0u; - CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_) : CompilerGLSL(move(spirv_)) { @@ -59,7 +57,14 @@ void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va) void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) { - resource_bindings.push_back({ binding, false }); + StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; + resource_bindings[tuple] = { binding, false }; +} + +void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index) +{ + SetBindingPair pair = { desc_set, binding }; + buffers_requiring_dynamic_offset[pair] = { index, 0 }; } void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) @@ -68,6 +73,17 @@ void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) argument_buffer_discrete_mask |= 1u << desc_set; } +void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage) +{ + if (desc_set < kMaxArgumentBuffers) + { + if (device_storage) + argument_buffer_device_storage_mask |= 1u << desc_set; + else + argument_buffer_device_storage_mask &= ~(1u << desc_set); + } +} + bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location) { return vtx_attrs_in_use.count(location) != 0; @@ -75,12 +91,29 @@ bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location) bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) { - auto itr = find_if(begin(resource_bindings), end(resource_bindings), - [&](const std::pair<MSLResourceBinding, bool> &resource) -> bool { - return model == resource.first.stage && desc_set == resource.first.desc_set && - binding == resource.first.binding; - }); - return itr != end(resource_bindings) && itr->second; + StageSetBinding tuple = { model, desc_set, binding }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) && itr->second.second; +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary); } void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components) @@ -93,7 +126,19 @@ void CompilerMSL::build_implicit_builtins() bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex; bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl; - if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params) + bool need_subgroup_mask = + active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) || + active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) || + active_input_builtins.get(BuiltInSubgroupLtMask); + bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) || + active_input_builtins.get(BuiltInSubgroupGtMask)); + bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index && + (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex)); + bool need_dispatch_base = + msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute && + (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId)); + if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || + need_multiview || need_dispatch_base || needs_subgroup_invocation_id) { bool has_frag_coord = false; bool has_sample_id = false; @@ -103,18 +148,23 @@ void CompilerMSL::build_implicit_builtins() bool has_base_instance = false; bool has_invocation_id = false; bool has_primitive_id = false; + bool has_subgroup_invocation_id = false; + bool has_subgroup_size = false; + bool has_view_idx = false; + uint32_t workgroup_id_type = 0; ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) return; - if (need_subpass_input && ir.meta[var.self].decoration.builtin_type == BuiltInFragCoord) + BuiltIn builtin = ir.meta[var.self].decoration.builtin_type; + if (need_subpass_input && builtin == BuiltInFragCoord) { builtin_frag_coord_id = var.self; has_frag_coord = true; } - if (need_sample_pos && ir.meta[var.self].decoration.builtin_type == BuiltInSampleId) + if (need_sample_pos && builtin == BuiltInSampleId) { builtin_sample_id_id = var.self; has_sample_id = true; @@ -122,7 +172,7 @@ void CompilerMSL::build_implicit_builtins() if (need_vertex_params) { - switch (ir.meta[var.self].decoration.builtin_type) + switch (builtin) { case BuiltInVertexIndex: builtin_vertex_idx_id = var.self; @@ -147,7 +197,7 @@ void CompilerMSL::build_implicit_builtins() if (need_tesc_params) { - switch (ir.meta[var.self].decoration.builtin_type) + switch (builtin) { case BuiltInInvocationId: builtin_invocation_id_id = var.self; @@ -161,6 +211,41 @@ void CompilerMSL::build_implicit_builtins() break; } } + + if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId) + { + builtin_subgroup_invocation_id_id = var.self; + has_subgroup_invocation_id = true; + } + + if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize) + { + builtin_subgroup_size_id = var.self; + has_subgroup_size = true; + } + + if (need_multiview) + { + if (builtin == BuiltInInstanceIndex) + { + // The view index here is derived from the instance index. + builtin_instance_idx_id = var.self; + has_instance_idx = true; + } + + if (builtin == BuiltInViewIndex) + { + builtin_view_idx_id = var.self; + has_view_idx = true; + } + } + + // The base workgroup needs to have the same type and vector size + // as the workgroup or invocation ID, so keep track of the type that + // was used. + if (need_dispatch_base && workgroup_id_type == 0 && + (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId)) + workgroup_id_type = var.basetype; }); if (!has_frag_coord && need_subpass_input) @@ -188,6 +273,7 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord); builtin_frag_coord_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id); } if (!has_sample_id && need_sample_pos) @@ -214,9 +300,11 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId); builtin_sample_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id); } - if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) + if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) || + (need_multiview && (!has_instance_idx || !has_view_idx))) { uint32_t offset = ir.increase_bound_by(2); uint32_t type_id = offset; @@ -235,7 +323,7 @@ void CompilerMSL::build_implicit_builtins() auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); ptr_type.self = type_id; - if (!has_vertex_idx) + if (need_vertex_params && !has_vertex_idx) { uint32_t var_id = ir.increase_bound_by(1); @@ -243,8 +331,10 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex); builtin_vertex_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id); } - if (!has_base_vertex) + + if (need_vertex_params && !has_base_vertex) { uint32_t var_id = ir.increase_bound_by(1); @@ -252,8 +342,10 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex); builtin_base_vertex_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id); } - if (!has_instance_idx) + + if (!has_instance_idx) // Needed by both multiview and tessellation { uint32_t var_id = ir.increase_bound_by(1); @@ -261,8 +353,10 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex); builtin_instance_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id); } - if (!has_base_instance) + + if (need_vertex_params && !has_base_instance) { uint32_t var_id = ir.increase_bound_by(1); @@ -270,6 +364,39 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance); builtin_base_instance_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id); + } + + if (need_multiview) + { + // Multiview shaders are not allowed to write to gl_Layer, ostensibly because + // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. + // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but + // gl_Layer is an output in vertex-pipeline shaders. + uint32_t type_ptr_out_id = ir.increase_bound_by(2); + SPIRType uint_type_ptr_out; + uint_type_ptr_out = uint_type; + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.parent_type = type_id; + uint_type_ptr_out.storage = StorageClassOutput; + auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out); + ptr_out_type.self = type_id; + uint32_t var_id = type_ptr_out_id + 1; + set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); + } + + if (need_multiview && !has_view_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_ViewIndex. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex); + builtin_view_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id); } } @@ -300,7 +427,9 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId); builtin_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id); } + if (!has_primitive_id) { uint32_t var_id = ir.increase_bound_by(1); @@ -309,56 +438,198 @@ void CompilerMSL::build_implicit_builtins() set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId); builtin_primitive_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id); + } + } + + if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id)) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_SubgroupInvocationID. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId); + builtin_subgroup_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id); + } + + if (!has_subgroup_size && need_subgroup_ge_mask) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_SubgroupSize. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize); + builtin_subgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id); + } + + if (need_dispatch_base) + { + uint32_t var_id; + if (msl_options.supports_msl_version(1, 2)) + { + // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin + // to convey this information and save a buffer slot. + uint32_t offset = ir.increase_bound_by(1); + var_id = offset; + + set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput); + set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase); + get_entry_point().interface_variables.push_back(var_id); + } + else + { + // Otherwise, we need to fall back to a good ol' fashioned buffer. + uint32_t offset = ir.increase_bound_by(2); + var_id = offset; + uint32_t type_id = offset + 1; + + SPIRType var_type = get<SPIRType>(workgroup_id_type); + var_type.storage = StorageClassUniform; + set<SPIRType>(type_id, var_type); + + set<SPIRVariable>(var_id, type_id, StorageClassUniform); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.indirect_params_buffer_index); } + set_name(var_id, "spvDispatchBase"); + builtin_dispatch_base_id = var_id; } } - if (needs_aux_buffer_def) - { - uint32_t offset = ir.increase_bound_by(5); - uint32_t type_id = offset; - uint32_t type_arr_id = offset + 1; - uint32_t struct_id = offset + 2; - uint32_t struct_ptr_id = offset + 3; - uint32_t var_id = offset + 4; - - // Create a buffer to hold extra data, including the swizzle constants. - SPIRType uint_type; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set<SPIRType>(type_id, uint_type); - - SPIRType uint_type_arr = uint_type; - uint_type_arr.array.push_back(0); - uint_type_arr.array_size_literal.push_back(true); - uint_type_arr.parent_type = type_id; - set<SPIRType>(type_arr_id, uint_type_arr); - set_decoration(type_arr_id, DecorationArrayStride, 4); - - SPIRType struct_type; - struct_type.basetype = SPIRType::Struct; - struct_type.member_types.push_back(type_arr_id); - auto &type = set<SPIRType>(struct_id, struct_type); - type.self = struct_id; - set_decoration(struct_id, DecorationBlock); - set_name(struct_id, "spvAux"); - set_member_name(struct_id, k_aux_mbr_idx_swizzle_const, "swizzleConst"); - set_member_decoration(struct_id, k_aux_mbr_idx_swizzle_const, DecorationOffset, 0); - - SPIRType struct_type_ptr = struct_type; - struct_type_ptr.pointer = true; - struct_type_ptr.parent_type = struct_id; - struct_type_ptr.storage = StorageClassUniform; - auto &ptr_type = set<SPIRType>(struct_ptr_id, struct_type_ptr); - ptr_type.self = struct_id; - - set<SPIRVariable>(var_id, struct_ptr_id, StorageClassUniform); - set_name(var_id, "spvAuxBuffer"); + if (needs_swizzle_buffer_def) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvSwizzleConstants"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding); + set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index); + swizzle_buffer_id = var_id; + } + + if (!buffers_requiring_array_length.empty()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvBufferSizeConstants"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding); + set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index); + buffer_size_buffer_id = var_id; + } + + if (needs_view_mask_buffer()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvViewMask"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(4u)); + set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index); + view_mask_buffer_id = var_id; + } + + if (!buffers_requiring_dynamic_offset.empty()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvDynamicOffsets"); // This should never match anything. - set_decoration(var_id, DecorationDescriptorSet, 0xFFFFFFFE); - set_decoration(var_id, DecorationBinding, msl_options.aux_buffer_index); - aux_buffer_id = var_id; + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.dynamic_offsets_buffer_index); + dynamic_offsets_buffer_id = var_id; + } +} + +void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: + active_builtins = &active_output_builtins; + break; + + default: + break; } + + assert(active_builtins != nullptr); + active_builtins->set(builtin); + get_entry_point().interface_variables.push_back(id); +} + +uint32_t CompilerMSL::build_constant_uint_array_pointer() +{ + uint32_t offset = ir.increase_bound_by(4); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t type_ptr_ptr_id = offset + 2; + uint32_t var_id = offset + 3; + + // Create a buffer to hold extra data, including the swizzle constants. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_pointer = uint_type; + uint_type_pointer.pointer = true; + uint_type_pointer.pointer_depth = 1; + uint_type_pointer.parent_type = type_id; + uint_type_pointer.storage = StorageClassUniform; + set<SPIRType>(type_ptr_id, uint_type_pointer); + set_decoration(type_ptr_id, DecorationArrayStride, 4); + + SPIRType uint_type_pointer2 = uint_type_pointer; + uint_type_pointer2.pointer_depth++; + uint_type_pointer2.parent_type = type_ptr_id; + set<SPIRType>(type_ptr_ptr_id, uint_type_pointer2); + + set<SPIRVariable>(var_id, type_ptr_ptr_id, StorageClassUniformConstant); + return var_id; } static string create_sampler_address(const char *prefix, MSLSamplerAddress addr) @@ -416,7 +687,7 @@ void CompilerMSL::emit_entry_point_declarations() // FIXME: Get test coverage here ... // Emit constexpr samplers here. - for (auto &samp : constexpr_samplers) + for (auto &samp : constexpr_samplers_by_id) { auto &var = get<SPIRVariable>(samp.first); auto &type = get<SPIRType>(var.basetype); @@ -532,9 +803,82 @@ void CompilerMSL::emit_entry_point_declarations() convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")")); } - statement("constexpr sampler ", - type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), - "(", merge(args), ");"); + // If we would emit no arguments, then omit the parentheses entirely. Otherwise, + // we'll wind up with a "most vexing parse" situation. + if (args.empty()) + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + ";"); + else + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + "(", merge(args), ");"); + } + + // Emit dynamic buffers here. + for (auto &dynamic_buffer : buffers_requiring_dynamic_offset) + { + if (!dynamic_buffer.second.second) + { + // Could happen if no buffer was used at requested binding point. + continue; + } + + const auto &var = get<SPIRVariable>(dynamic_buffer.second.second); + uint32_t var_id = var.self; + const auto &type = get_variable_data_type(var); + string name = to_name(var.self); + uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet); + uint32_t arg_id = argument_buffer_ids[desc_set]; + uint32_t base_index = dynamic_buffer.second.first; + + if (!type.array.empty()) + { + // This is complicated, because we need to support arrays of arrays. + // And it's even worse if the outermost dimension is a runtime array, because now + // all this complicated goop has to go into the shader itself. (FIXME) + if (!type.array[type.array.size() - 1]) + SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); + else + { + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name, + type_to_array_glsl(type), " ="); + uint32_t dim = uint32_t(type.array.size()); + uint32_t j = 0; + for (SmallVector<uint32_t> indices(type.array.size()); + indices[type.array.size() - 1] < to_array_size_literal(type); j++) + { + while (dim > 0) + { + begin_scope(); + --dim; + } + + string arrays; + for (uint32_t i = uint32_t(type.array.size()); i; --i) + arrays += join("[", indices[i - 1], "]"); + statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", + to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", + to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), + arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),"); + + while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1) + { + end_scope(","); + indices[dim++] = 0; + } + } + end_scope_decl(); + statement_no_indent(""); + } + } + else + { + statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(", + get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((", + get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".", + ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);"); + } } // Emit buffer arrays here. @@ -543,10 +887,10 @@ void CompilerMSL::emit_entry_point_declarations() const auto &var = get<SPIRVariable>(array_id); const auto &type = get_variable_data_type(var); string name = to_name(array_id); - statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] ="); + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] ="); begin_scope(); - for (uint32_t i = 0; i < type.array[0]; ++i) - statement(name + "_" + convert_to_string(i) + ","); + for (uint32_t i = 0; i < to_array_size_literal(type); ++i) + statement(name, "_", i, ","); end_scope_decl(); statement_no_indent(""); } @@ -564,7 +908,7 @@ string CompilerMSL::compile() backend.float_literal_suffix = false; backend.uint32_t_literal_suffix = true; backend.int16_t_literal_suffix = ""; - backend.uint16_t_literal_suffix = "u"; + backend.uint16_t_literal_suffix = ""; backend.basic_int_type = "int"; backend.basic_uint_type = "uint"; backend.basic_int8_type = "char"; @@ -572,6 +916,8 @@ string CompilerMSL::compile() backend.basic_int16_type = "short"; backend.basic_uint16_type = "ushort"; backend.discard_literal = "discard_fragment()"; + backend.demote_literal = "unsupported-demote"; + backend.boolean_mix_function = "select"; backend.swizzle_is_function = false; backend.shared_is_implied = false; backend.use_initializer_list = true; @@ -580,34 +926,46 @@ string CompilerMSL::compile() backend.unsized_array_supported = false; backend.can_declare_arrays_inline = false; backend.can_return_array = false; - backend.boolean_mix_support = false; backend.allow_truncated_access_chain = true; backend.array_is_value_type = false; backend.comparison_image_samples_scalar = true; backend.native_pointers = true; backend.nonuniform_qualifier = ""; + backend.support_small_type_sampling_result = true; capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; - replace_illegal_names(); + // Initialize array here rather than constructor, MSVC 2013 workaround. + for (auto &id : next_metal_resource_ids) + id = 0; - struct_member_padding.clear(); + fixup_type_alias(); + replace_illegal_names(); build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); analyze_sampled_image_usage(); + analyze_interlocked_resource_usage(); + preprocess_op_codes(); build_implicit_builtins(); fixup_image_load_store_access(); set_enabled_interface_variables(get_active_interface_variables()); - if (aux_buffer_id) - active_interface_variables.insert(aux_buffer_id); - - // Preprocess OpCodes to extract the need to output additional header content - preprocess_op_codes(); + if (swizzle_buffer_id) + active_interface_variables.insert(swizzle_buffer_id); + if (buffer_size_buffer_id) + active_interface_variables.insert(buffer_size_buffer_id); + if (view_mask_buffer_id) + active_interface_variables.insert(view_mask_buffer_id); + if (dynamic_offsets_buffer_id) + active_interface_variables.insert(dynamic_offsets_buffer_id); + if (builtin_layer_id) + active_interface_variables.insert(builtin_layer_id); + if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2)) + active_interface_variables.insert(builtin_dispatch_base_id); // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. @@ -633,6 +991,7 @@ string CompilerMSL::compile() // Mark any non-stage-in structs to be tightly packed. mark_packable_structs(); + reorder_type_alias(); // Add fixup hooks required by shader inputs and outputs. This needs to happen before // the loop, so the hooks aren't added multiple times. @@ -659,6 +1018,8 @@ string CompilerMSL::compile() next_metal_resource_index_buffer = 0; next_metal_resource_index_texture = 0; next_metal_resource_index_sampler = 0; + for (auto &id : next_metal_resource_ids) + id = 0; // Move constructor for this type is broken on GCC 4.9 ... buffer.reset(); @@ -700,6 +1061,9 @@ void CompilerMSL::preprocess_op_codes() is_rasterization_disabled = true; capture_output_to_buffer = true; } + + if (preproc.needs_subgroup_invocation_id) + needs_subgroup_invocation_id = true; } // Move the Private and Workgroup global variables to the entry function. @@ -783,6 +1147,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: + case OpArrayLength: { uint32_t base_id = ops[2]; if (global_var_ids.find(base_id) != global_var_ids.end()) @@ -890,7 +1255,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: added_out = true; } type_id = get<SPIRVariable>(arg_id).basetype; - p_type = &get<SPIRType>(type_id); uint32_t next_id = ir.increase_bound_by(1); func.add_parameter(type_id, next_id, true); set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id); @@ -962,7 +1326,7 @@ void CompilerMSL::mark_packable_structs() } // If the specified type is a struct, it and any nested structs -// are marked as packable with the SPIRVCrossDecorationPacked decoration, +// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration, void CompilerMSL::mark_as_packable(SPIRType &type) { // If this is not the base type (eg. it's a pointer or array), tunnel down @@ -974,10 +1338,10 @@ void CompilerMSL::mark_as_packable(SPIRType &type) if (type.basetype == SPIRType::Struct) { - set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked); // Recurse - size_t mbr_cnt = type.member_types.size(); + uint32_t mbr_cnt = uint32_t(type.member_types.size()); for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) { uint32_t mbr_type_id = type.member_types[mbr_idx]; @@ -1080,7 +1444,7 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co else if (!strip_array) ir.meta[var.self].decoration.qualified_alias = qual_var_name; - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage == StorageClassOutput && var.initializer != ID(0)) { entry_func.fixup_hooks_in.push_back( [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); }); @@ -1768,8 +2132,7 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t bool in_array = false; for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++) { - auto &mbr_dec = ir.meta[ib_type_id].members[i]; - uint32_t var_id = mbr_dec.extended.ib_orig_id; + uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID); if (!var_id) continue; auto &var = get<SPIRVariable>(var_id); @@ -1823,21 +2186,50 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t // Returns the ID of the newly added variable, or zero if no variable was added. uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) { - // Accumulate the variables that should appear in the interface struct + // Accumulate the variables that should appear in the interface struct. SmallVector<SPIRVariable *> vars; - bool incl_builtins = (storage == StorageClassOutput || is_tessellation_shader()); + bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader(); + bool has_seen_barycentric = false; ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) { + if (var.storage != storage) + return; + auto &type = this->get<SPIRType>(var.basetype); - BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); - if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) && - !is_hidden_variable(var, incl_builtins) && type.pointer && - (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch && - (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || - bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || - bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask || - (get_execution_model() == ExecutionModelTessellationEvaluation && - (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)))) + + bool is_builtin = is_builtin_variable(var); + auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + + // These builtins are part of the stage in/out structs. + bool is_interface_block_builtin = + (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || + bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || + bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth || + bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) || + (get_execution_model() == ExecutionModelTessellationEvaluation && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); + + bool is_active = interface_variable_exists_in_entry_point(var.self); + if (is_builtin && is_active) + { + // Only emit the builtin if it's active in this entry point. Interface variable list might lie. + is_active = has_active_builtin(bi_type, storage); + } + + bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch; + + bool hidden = is_hidden_variable(var, incl_builtins); + // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. + if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV)) + { + if (has_seen_barycentric) + SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL."); + has_seen_barycentric = true; + hidden = false; + } + + if (is_active && !hidden && type.pointer && filter_patch_decoration && + (!is_builtin || is_interface_block_builtin)) { vars.push_back(&var); } @@ -1957,7 +2349,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref); set_name(ib_var_id, ib_var_ref); - for (auto p_var : vars) + for (auto *p_var : vars) { bool strip_array = (get_execution_model() == ExecutionModelTessellationControl || @@ -2055,7 +2447,8 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil auto &type = get<SPIRType>(type_id); if ((builtin == BuiltInSampleMask && is_array(type)) || - ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex) && type.basetype != SPIRType::UInt)) + ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) && + type.basetype != SPIRType::UInt)) { uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); uint32_t base_type_id = next_id++; @@ -2163,188 +2556,621 @@ uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t l return type_id; } +void CompilerMSL::mark_struct_members_packed(const SPIRType &type) +{ + set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked); + + // Problem case! Struct needs to be placed at an awkward alignment. + // Mark every member of the child struct as packed. + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + auto &mbr_type = get<SPIRType>(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct) + { + // Recursively mark structs as packed. + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get<SPIRType>(struct_type->parent_type); + mark_struct_members_packed(*struct_type); + } + else if (!is_scalar(mbr_type)) + set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked); + } +} + +void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) +{ + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + auto &mbr_type = get<SPIRType>(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct) + { + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get<SPIRType>(struct_type->parent_type); + + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked)) + continue; + + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i); + uint32_t msl_size = get_declared_struct_member_size_msl(type, i); + uint32_t spirv_offset = type_struct_member_offset(type, i); + uint32_t spirv_offset_next; + if (i + 1 < mbr_cnt) + spirv_offset_next = type_struct_member_offset(type, i + 1); + else + spirv_offset_next = spirv_offset + msl_size; + + // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes, + // and the next member will be placed at offset 12. + bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0; + bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next; + uint32_t array_stride = 0; + bool struct_needs_explicit_padding = false; + + // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct. + if (!mbr_type.array.empty()) + { + array_stride = type_struct_member_array_stride(type, i); + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(mbr_type, dim); + array_stride /= max(array_size, 1u); + } + + // Set expected struct size based on ArrayStride. + struct_needs_explicit_padding = true; + + // If struct size is larger than array stride, we might be able to fit, if we tightly pack. + if (get_declared_struct_size_msl(*struct_type) > array_stride) + struct_is_too_large = true; + } + + if (struct_is_misaligned || struct_is_too_large) + mark_struct_members_packed(*struct_type); + mark_scalar_layout_structs(*struct_type); + + if (struct_needs_explicit_padding) + { + msl_size = get_declared_struct_size_msl(*struct_type, true, true); + if (array_stride < msl_size) + { + SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type."); + } + else + { + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + { + if (array_stride != + get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + SPIRV_CROSS_THROW( + "A struct is used with different array strides. Cannot express this in MSL."); + } + else + set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride); + } + } + } + } +} + // Sort the members of the struct type by offset, and pack and then pad members where needed // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing // occurs first, followed by padding, because packing a member reduces both its size and its // natural alignment, possibly requiring a padding member to be added ahead of it. -void CompilerMSL::align_struct(SPIRType &ib_type) +void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs) { - uint32_t &ib_type_id = ib_type.self; + // We align structs recursively, so stop any redundant work. + ID &ib_type_id = ib_type.self; + if (aligned_structs.count(ib_type_id)) + return; + aligned_structs.insert(ib_type_id); // Sort the members of the interface structure by their offset. // They should already be sorted per SPIR-V spec anyway. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset); member_sorter.sort(); - uint32_t mbr_cnt = uint32_t(ib_type.member_types.size()); + auto mbr_cnt = uint32_t(ib_type.member_types.size()); + + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + // Pack any dependent struct types before we pack a parent struct. + auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]); + if (mbr_type.basetype == SPIRType::Struct) + align_struct(mbr_type, aligned_structs); + } // Test the alignment of each member, and if a member should be closer to the previous // member than the default spacing expects, it is likely that the previous member is in // a packed format. If so, and the previous member is packable, pack it. - // For example...this applies to any 3-element vector that is followed by a scalar. - uint32_t curr_offset = 0; + // For example ... this applies to any 3-element vector that is followed by a scalar. + uint32_t msl_offset = 0; for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) { - if (is_member_packable(ib_type, mbr_idx)) - { - set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked); - set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType, - ib_type.member_types[mbr_idx]); - } + // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V + // offsets, array strides and matrix strides. + ensure_member_packing_rules_msl(ib_type, mbr_idx); - // Align current offset to the current member's default alignment. - size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1; - uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask); + // Align current offset to the current member's default alignment. If the member was packed, it will observe + // the updated alignment here. + uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1; + uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; // Fetch the member offset as declared in the SPIRV. - uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); - if (mbr_offset > aligned_curr_offset) + uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); + if (spirv_mbr_offset > aligned_msl_offset) { // Since MSL and SPIR-V have slightly different struct member alignment and - // size rules, we'll pad to standard C-packing rules. If the member is farther + // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther // away than C-packing, expects, add an inert padding member before the the member. - MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx); - struct_member_padding[key] = mbr_offset - curr_offset; + uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset; + set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes); + + // Re-align as a sanity check that aligning post-padding matches up. + msl_offset += padding_bytes; + aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; + } + else if (spirv_mbr_offset < aligned_msl_offset) + { + // This should not happen, but deal with unexpected scenarios. + // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V. + SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL."); } + assert(aligned_msl_offset == spirv_mbr_offset); + // Increment the current offset to be positioned immediately after the current member. // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here. if (mbr_idx + 1 < mbr_cnt) - curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx)); + msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx); } } -// Returns whether the specified struct member supports a packable type -// variation that is smaller than the unpacked variation of that type. -bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index) +bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const { - // We've already marked it as packable - if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked)) - return true; - - auto &mbr_type = get<SPIRType>(ib_type.member_types[index]); - - uint32_t component_size = mbr_type.width / 8; - uint32_t unpacked_mbr_size; - if (mbr_type.vecsize == 3) - unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns; - else - unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns; + auto &mbr_type = get<SPIRType>(type.member_types[index]); + uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset); + + if (index + 1 < type.member_types.size()) + { + // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member, + // we *must* perform some kind of remapping, no way getting around it. + // We can always pad after this member if necessary, so that case is fine. + uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset); + assert(spirv_offset_next >= spirv_offset); + uint32_t maximum_size = spirv_offset_next - spirv_offset; + uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index); + if (msl_mbr_size > maximum_size) + return false; + } - // Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead, - // but we will use the same mechanism. - if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 && - type_struct_member_array_stride(ib_type, index) == 4 * component_size) + if (!mbr_type.array.empty()) { - return true; + // If we have an array type, array stride must match exactly with SPIR-V. + uint32_t spirv_array_stride = type_struct_member_array_stride(type, index); + uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index); + if (spirv_array_stride != msl_array_stride) + return false; } - // Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself. - // This can happen for struct A { float a }; A a[]; in std140 layout. - // TODO: Emit a padded struct which can be used for this purpose. - if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct) + if (is_matrix(mbr_type)) { - size_t declared_struct_size = get_declared_struct_size(mbr_type); - size_t alignment = get_declared_struct_member_alignment(ib_type, index); - declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1); - if (type_struct_member_array_stride(ib_type, index) > declared_struct_size) - return true; + // Need to check MatrixStride as well. + uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index); + uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index); + if (spirv_matrix_stride != msl_matrix_stride) + return false; } - // TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column. - //if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16) - // SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout."); - - // Only vectors or 3-row matrices need to be packed. - if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3)) + // Now, we check alignment. + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index); + if ((spirv_offset % msl_alignment) != 0) return false; - // Only row-major matrices need to be packed. - if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor)) - return false; + // We're in the clear. + return true; +} - if (is_array(mbr_type)) - { - // If member is an array, and the array stride is larger than the type needs, don't pack it. - // Take into consideration multi-dimentional arrays. - uint32_t md_elem_cnt = 1; - size_t last_elem_idx = mbr_type.array.size() - 1; - for (uint32_t i = 0; i < last_elem_idx; i++) - md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u); +// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions. +// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types. +// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides. +void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index) +{ + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We failed validation. + // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite + // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule + // that struct alignment == max alignment of all members and struct size depends on this alignment. + auto &mbr_type = get<SPIRType>(ib_type.member_types[index]); + if (mbr_type.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct."); - uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt; + // Perform remapping here. + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + + // Try validating again, now with packed. + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect. + // A lot of work goes here ... + // We will need remapping on Load and Store to translate the types between Logical and Physical. + + // First, we check if we have small vector std140 array. + // We detect this if we have an array of vectors, and array stride is greater than number of elements. + if (!mbr_type.array.empty() && !is_matrix(mbr_type)) + { uint32_t array_stride = type_struct_member_array_stride(ib_type, index); - return unpacked_array_stride > array_stride; + + // Hack off array-of-arrays until we find the array stride per element we must have to make it work. + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + array_stride /= max(to_array_size_literal(mbr_type, dim), 1u); + + uint32_t elems_per_stride = array_stride / (mbr_type.width / 8); + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + auto physical_type = mbr_type; + physical_type.vecsize = elems_per_stride; + physical_type.parent_type = 0; + uint32_t type_id = ir.increase_bound_by(1); + set<SPIRType>(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + set_decoration(type_id, DecorationArrayStride, array_stride); + + // Remove packed_ for vectors of size 1, 2 and 4. + if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked)) + SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix " + "scalar and std140 layout rules."); + else + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); } - else + else if (is_matrix(mbr_type)) { - uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset); - // For vectors, pack if the member's offset doesn't conform to the - // type's usual alignment. For example, a float3 at offset 4. - if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size)) - return true; - // Pack if there is not enough space between this member and next. - // If last member, only pack if it's a row-major matrix. - if (index < ib_type.member_types.size() - 1) - { - uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset); - return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr; - } + // MatrixStride might be std140-esque. + uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index); + + uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8); + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor); + + auto physical_type = mbr_type; + physical_type.parent_type = 0; + if (row_major) + physical_type.columns = elems_per_stride; + else + physical_type.vecsize = elems_per_stride; + uint32_t type_id = ir.increase_bound_by(1); + set<SPIRType>(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + + // Remove packed_ for vectors of size 1, 2 and 4. + if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked)) + SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix " + "scalar and std140 layout rules."); else - return is_matrix(mbr_type); + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); } -} -// Returns a combination of type ID and member index for use as hash key -MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index) -{ - MSLStructMemberKey k = type_id; - k <<= 32; - k += index; - return k; + // This better validate now, or we must fail gracefully. + if (!validate_member_packing_rules_msl(ib_type, index)) + SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL."); } void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) { - if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) || - get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0) + auto &type = expression_type(rhs_expression); + + bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID); + bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked); + auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression); + auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression); + + bool transpose = lhs_e && lhs_e->need_transpose; + + // No physical type remapping, and no packed type, so can just emit a store directly. + if (!lhs_remapped_type && !lhs_packed_type) + { + // We might not be dealing with remapped physical types or packed types, + // but we might be doing a clean store to a row-major matrix. + // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any. + if (is_matrix(type) && lhs_e && lhs_e->need_transpose) + { + if (!rhs_e) + SPIRV_CROSS_THROW("Need to transpose right-side expression of a store to row-major matrix, but it is " + "not a SPIRExpression."); + lhs_e->need_transpose = false; + + if (rhs_e && rhs_e->need_transpose) + { + // Direct copy, but might need to unpack RHS. + // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T. + rhs_e->need_transpose = false; + statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression), + ";"); + rhs_e->need_transpose = true; + } + else + statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");"); + + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else if (lhs_e && lhs_e->need_transpose) + { + lhs_e->need_transpose = false; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_dereferenced_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } + else if (!lhs_remapped_type && !is_matrix(type) && !transpose) { + // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly, + // since they are declared as array of vectors instead, and we need the fallback path below. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); } else { - // Special handling when storing to a float[] or float2[] in std140 layout. + // Special handling when storing to a remapped physical type. + // This is mostly to deal with std140 padded matrices or vectors. + + TypeID physical_type_id = lhs_remapped_type ? + ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) : + type.self; + + auto &physical_type = get<SPIRType>(physical_type_id); + + static const char *swizzle_lut[] = { + ".x", + ".xy", + ".xyz", + "", + }; + + if (is_matrix(type)) + { + // Packed matrices are stored as arrays of packed vectors, so we need + // to assign the vectors one at a time. + // For row-major matrices, we need to transpose the *right-hand* side, + // not the left-hand side. - auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType)); - string lhs = to_dereferenced_expression(lhs_expression); - string rhs = to_pointer_expression(rhs_expression); + // Lots of cases to cover here ... - // Unpack the expression so we can store to it with a float or float2. - // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead. - if (is_scalar(type) && is_array(type)) - lhs = enclose_expression(lhs) + ".x"; - else if (is_vector(type) && type.vecsize == 2 && is_array(type)) - lhs = enclose_expression(lhs) + ".xy"; + bool rhs_transpose = rhs_e && rhs_e->need_transpose; + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = false; + + if (transpose) + { + // We're dealing with transpose manually. + lhs_e->need_transpose = false; + + const char *store_swiz = ""; + if (physical_type.columns != type.columns) + store_swiz = swizzle_lut[type.columns - 1]; + + if (rhs_transpose) + { + // If RHS is also transposed, we can just copy row by row. + for (uint32_t i = 0; i < type.vecsize; i++) + { + statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", + to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];"); + } + } + else + { + auto vector_type = expression_type(rhs_expression); + vector_type.vecsize = vector_type.columns; + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.vecsize; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";"); + } + } + + // We're dealing with transpose manually. + lhs_e->need_transpose = true; + } + else + { + const char *store_swiz = ""; + if (physical_type.vecsize != type.vecsize) + store_swiz = swizzle_lut[type.vecsize - 1]; + + if (rhs_transpose) + { + auto vector_type = expression_type(rhs_expression); + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.columns; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + // Need to explicitly unpack expression since we've mucked with transpose state. + auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression); + rhs_row += join(unpacked_expr, "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";"); + } + } + else + { + // Copy column-by-column. + for (uint32_t i = 0; i < type.columns; i++) + { + statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", + to_enclosed_unpacked_expression(rhs_expression), "[", i, "];"); + } + } + } + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = true; + } + else if (transpose) + { + lhs_e->need_transpose = false; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_enclosed_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + + lhs_e->need_transpose = true; + } + else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize) + { + assert(type.vecsize >= 1 && type.vecsize <= 3); + + // If we have packed types, we cannot use swizzled stores. + // We could technically unroll the store for each element if needed. + // When remapping to a std140 physical type, we always get float4, + // and the packed decoration should always be removed. + assert(!lhs_packed_type); + + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + + // Unpack the expression so we can store to it with a float or float2. + // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead. + lhs = enclose_expression(lhs) + swizzle_lut[type.vecsize - 1]; + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + else if (!is_matrix(type)) + { + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } - if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) - statement(lhs, " = ", rhs, ";"); register_write(lhs_expression); } } // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. -string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id) +// Also, handle special physical ID remapping scenarios, similar to emit_store_statement(). +string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed, bool row_major) { - const SPIRType *packed_type = nullptr; - if (packed_type_id) - packed_type = &get<SPIRType>(packed_type_id); + // Trivial case, nothing to do. + if (physical_type_id == 0 && !packed) + return expr_str; + + const SPIRType *physical_type = nullptr; + if (physical_type_id) + physical_type = &get<SPIRType>(physical_type_id); + + static const char *swizzle_lut[] = { + ".x", + ".xy", + ".xyz", + }; + + // std140 array cases for vectors. + if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && physical_type->vecsize > type.vecsize) + { + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; + } + else if (is_matrix(type)) + { + // Packed matrices are stored as arrays of packed vectors. Unfortunately, + // we can't just pass the array straight to the matrix constructor. We have to + // pass each vector individually, so that they can be unpacked to normal vectors. + if (!physical_type) + physical_type = &type; + + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; + if (row_major) + swap(vecsize, columns); + + uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize; + + const char *base_type = type.width == 16 ? "half" : "float"; + string unpack_expr = join(base_type, columns, "x", vecsize, "("); + + const char *load_swiz = ""; - // float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead. - if (packed_type && is_array(*packed_type) && is_scalar(*packed_type)) - return enclose_expression(expr_str) + ".x"; - else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2) - return enclose_expression(expr_str) + ".xy"; + if (physical_vecsize != vecsize) + load_swiz = swizzle_lut[vecsize - 1]; + + for (uint32_t i = 0; i < columns; i++) + { + if (i > 0) + unpack_expr += ", "; + + if (packed) + unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz); + else + unpack_expr += join(expr_str, "[", i, "]", load_swiz); + } + + unpack_expr += ")"; + return unpack_expr; + } else return join(type_to_glsl(type), "(", expr_str, ")"); } @@ -2399,6 +3225,39 @@ void CompilerMSL::emit_custom_functions() if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i))) spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1)); + if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler)) + { + // Unfortunately, this one needs a lot of the other functions to compile OK. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW( + "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0."); + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplTextureSwizzle); + if (msl_options.swizzle_texture_samples) + spv_function_implementations.insert(SPVFuncImplGatherSwizzle); + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + spv_function_implementations.insert(static_cast<SPVFuncImpl>(i)); + spv_function_implementations.insert(SPVFuncImplExpandITUFullRange); + spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020); + } + + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i))) + spv_function_implementations.insert(SPVFuncImplForwardArgs); + + if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle)) + { + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplGetSwizzle); + } + for (auto &spv_func : spv_function_implementations) { switch (spv_func) @@ -2406,7 +3265,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplMod: statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()"); statement("template<typename Tx, typename Ty>"); - statement("Tx mod(Tx x, Ty y)"); + statement("inline Tx mod(Tx x, Ty y)"); begin_scope(); statement("return x - y * floor(x / y);"); end_scope(); @@ -2416,7 +3275,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplRadians: statement("// Implementation of the GLSL radians() function"); statement("template<typename T>"); - statement("T radians(T d)"); + statement("inline T radians(T d)"); begin_scope(); statement("return d * T(0.01745329251);"); end_scope(); @@ -2426,7 +3285,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplDegrees: statement("// Implementation of the GLSL degrees() function"); statement("template<typename T>"); - statement("T degrees(T r)"); + statement("inline T degrees(T r)"); begin_scope(); statement("return r * T(57.2957795131);"); end_scope(); @@ -2436,7 +3295,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindILsb: statement("// Implementation of the GLSL findLSB() function"); statement("template<typename T>"); - statement("T findLSB(T x)"); + statement("inline T spvFindLSB(T x)"); begin_scope(); statement("return select(ctz(x), T(-1), x == T(0));"); end_scope(); @@ -2446,7 +3305,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindUMsb: statement("// Implementation of the unsigned GLSL findMSB() function"); statement("template<typename T>"); - statement("T findUMSB(T x)"); + statement("inline T spvFindUMSB(T x)"); begin_scope(); statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));"); end_scope(); @@ -2456,7 +3315,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplFindSMsb: statement("// Implementation of the signed GLSL findMSB() function"); statement("template<typename T>"); - statement("T findSMSB(T x)"); + statement("inline T spvFindSMSB(T x)"); begin_scope(); statement("T v = select(x, T(-1) - x, x < T(0));"); statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));"); @@ -2467,7 +3326,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplSSign: statement("// Implementation of the GLSL sign() function for integer types"); statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>"); - statement("T sign(T x)"); + statement("inline T sign(T x)"); begin_scope(); statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));"); end_scope(); @@ -2475,40 +3334,27 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplArrayCopy: - statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via " - "assignment."); - statement("template<typename T, uint N>"); - statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - - statement("template<typename T, uint N>"); - statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - break; - case SPVFuncImplArrayOfArrayCopy2Dim: case SPVFuncImplArrayOfArrayCopy3Dim: case SPVFuncImplArrayOfArrayCopy4Dim: case SPVFuncImplArrayOfArrayCopy5Dim: case SPVFuncImplArrayOfArrayCopy6Dim: { + // Unfortunately we cannot template on the address space, so combinatorial explosion it is. static const char *function_name_tags[] = { - "FromStack", - "FromConstant", + "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack", + "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup", }; static const char *src_address_space[] = { - "thread const", - "constant", + "constant", "constant", "thread const", "thread const", "threadgroup const", "threadgroup const", }; - for (uint32_t variant = 0; variant < 2; variant++) + static const char *dst_address_space[] = { + "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup", + }; + + for (uint32_t variant = 0; variant < 6; variant++) { uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; string tmp = "template<typename T"; @@ -2528,17 +3374,23 @@ void CompilerMSL::emit_custom_functions() array_arg += "]"; } - statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg, - ", ", src_address_space[variant], " T (&src)", array_arg, ")"); + statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(", + dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant], + " T (&src)", array_arg, ")"); begin_scope(); statement("for (uint i = 0; i < A; i++)"); begin_scope(); - statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);"); + + if (dimensions == 1) + statement("dst[i] = src[i];"); + else + statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);"); end_scope(); end_scope(); statement(""); } + break; } @@ -2546,7 +3398,7 @@ void CompilerMSL::emit_custom_functions() { string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); - statement("uint2 spvTexelBufferCoord(uint tc)"); + statement("inline uint2 spvTexelBufferCoord(uint tc)"); begin_scope(); statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); end_scope(); @@ -2572,7 +3424,7 @@ void CompilerMSL::emit_custom_functions() statement(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float4x4 spvInverse4x4(float4x4 m)"); + statement("inline float4x4 spvInverse4x4(float4x4 m)"); begin_scope(); statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -2637,7 +3489,7 @@ void CompilerMSL::emit_custom_functions() statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float3x3 spvInverse3x3(float3x3 m)"); + statement("inline float3x3 spvInverse3x3(float3x3 m)"); begin_scope(); statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -2667,7 +3519,7 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplInverse2x2: statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float2x2 spvInverse2x2(float2x2 m)"); + statement("inline float2x2 spvInverse2x2(float2x2 m)"); begin_scope(); statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -2688,65 +3540,24 @@ void CompilerMSL::emit_custom_functions() statement(""); break; - case SPVFuncImplRowMajor2x3: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)"); - begin_scope(); - statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor2x4: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)"); - begin_scope(); - statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], " - "m[1][3]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor3x2: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)"); - begin_scope(); - statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor3x4: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)"); - begin_scope(); - statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], " - "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));"); - end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor4x2: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)"); + case SPVFuncImplForwardArgs: + statement("template<typename T> struct spvRemoveReference { typedef T type; };"); + statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };"); + statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };"); + statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference<T>::type& x)"); begin_scope(); - statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), " - "float2(m[1][1], m[3][1]));"); + statement("return static_cast<thread T&&>(x);"); end_scope(); - statement(""); - break; - - case SPVFuncImplRowMajor4x3: - statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); - statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)"); + statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference<T>::type&& x)"); begin_scope(); - statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), " - "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));"); + statement("return static_cast<thread T&&>(x);"); end_scope(); statement(""); break; - case SPVFuncImplTextureSwizzle: + case SPVFuncImplGetSwizzle: statement("enum class spvSwizzle : uint"); begin_scope(); statement("none = 0,"); @@ -2758,20 +3569,6 @@ void CompilerMSL::emit_custom_functions() statement("alpha"); end_scope_decl(); statement(""); - statement("template<typename T> struct spvRemoveReference { typedef T type; };"); - statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };"); - statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };"); - statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference<T>::type& x)"); - begin_scope(); - statement("return static_cast<thread T&&>(x);"); - end_scope(); - statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference<T>::type&& x)"); - begin_scope(); - statement("return static_cast<thread T&&>(x);"); - end_scope(); - statement(""); statement("template<typename T>"); statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)"); begin_scope(); @@ -2794,6 +3591,9 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplTextureSwizzle: statement("// Wrapper function that swizzles texture samples and fetches."); statement("template<typename T>"); statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)"); @@ -2812,11 +3612,14 @@ void CompilerMSL::emit_custom_functions() statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;"); end_scope(); statement(""); + break; + + case SPVFuncImplGatherSwizzle: statement("// Wrapper function that swizzles texture gathers."); - statement("template<typename T, typename Tex, typename... Ts>"); - statement( - "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) " - "METAL_CONST_ARG(c)"); + statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, " + "typename... Ts>"); + statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, " + "uint sw, component c, Ts... params) METAL_CONST_ARG(c)"); begin_scope(); statement("if (sw)"); begin_scope(); @@ -2853,10 +3656,14 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplGatherCompareSwizzle: statement("// Wrapper function that swizzles depth texture gathers."); - statement("template<typename T, typename Tex, typename... Ts>"); - statement( - "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) "); + statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, " + "typename... Ts>"); + statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler " + "s, uint sw, Ts... params) "); begin_scope(); statement("if (sw)"); begin_scope(); @@ -2877,6 +3684,828 @@ void CompilerMSL::emit_custom_functions() statement("return t.gather_compare(s, spvForward<Ts>(params)...);"); end_scope(); statement(""); + break; + + case SPVFuncImplSubgroupBallot: + statement("inline uint4 spvSubgroupBallot(bool value)"); + begin_scope(); + statement("simd_vote vote = simd_ballot(value);"); + statement("// simd_ballot() returns a 64-bit integer-like object, but"); + statement("// SPIR-V callers expect a uint4. We must convert."); + statement("// FIXME: This won't include higher bits if Apple ever supports"); + statement("// 128 lanes in an SIMD-group."); + statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> " + "32) & 0xFFFFFFFF), 0, 0);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotBitExtract: + statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)"); + begin_scope(); + statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotFindLSB: + statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)"); + begin_scope(); + statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + " + "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotFindMSB: + statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)"); + begin_scope(); + statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - " + "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), " + "ballot.z == 0), ballot.w == 0);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotBitCount: + statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)"); + begin_scope(); + statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);"); + end_scope(); + statement(""); + statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); + begin_scope(); + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " + "uint2(0));"); + statement("return spvSubgroupBallotBitCount(ballot & mask);"); + end_scope(); + statement(""); + statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); + begin_scope(); + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); + statement("return spvSubgroupBallotBitCount(ballot & mask);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupAllEqual: + // Metal doesn't provide a function to evaluate this directly. But, we can + // implement this by comparing every thread's value to one thread's value + // (in this case, the value of the first active thread). Then, by the transitive + // property of equality, if all comparisons return true, then they are all equal. + statement("template<typename T>"); + statement("inline bool spvSubgroupAllEqual(T value)"); + begin_scope(); + statement("return simd_all(value == simd_broadcast_first(value));"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupAllEqual(bool value)"); + begin_scope(); + statement("return simd_all(value) || !simd_any(value);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplReflectScalar: + // Metal does not support scalar versions of these functions. + statement("template<typename T>"); + statement("inline T spvReflect(T i, T n)"); + begin_scope(); + statement("return i - T(2) * i * n * n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRefractScalar: + // Metal does not support scalar versions of these functions. + statement("template<typename T>"); + statement("inline T spvRefract(T i, T n, T eta)"); + begin_scope(); + statement("T NoI = n * i;"); + statement("T NoI2 = NoI * NoI;"); + statement("T k = T(1) - eta * eta * (T(1) - NoI2);"); + statement("if (k < T(0))"); + begin_scope(); + statement("return T(0);"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + break; + + case SPVFuncImplFaceForwardScalar: + // Metal does not support scalar versions of these functions. + statement("template<typename T>"); + statement("inline T spvFaceForward(T n, T i, T nref)"); + begin_scope(); + statement("return i * nref < T(0) ? n : -n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler " + "samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, " + "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> " + "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> " + "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, " + "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, " + "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, " + "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, " + "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, " + "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, " + "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, " + "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane: + statement("template<typename T, typename... LodOptions>"); + statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, " + "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUFullRange: + statement("template<typename T>"); + statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)"); + begin_scope(); + statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUNarrowRange: + statement("template<typename T>"); + statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)"); + begin_scope(); + statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);"); + statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT709: + statement("// cf. Khronos Data Format Specification, section 15.1.1"); + statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, " + "-0.33480248/0.7152, 0}};"); + statement(""); + statement("template<typename T>"); + statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)"); + begin_scope(); + statement("vec<T, 4> rgba;"); + statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT601: + statement("// cf. Khronos Data Format Specification, section 15.1.2"); + statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, " + "-0.419198/0.587, 0}};"); + statement(""); + statement("template<typename T>"); + statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)"); + begin_scope(); + statement("vec<T, 4> rgba;"); + statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT2020: + statement("// cf. Khronos Data Format Specification, section 15.1.3"); + statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, " + "-0.38737742/0.6780, 0}};"); + statement(""); + statement("template<typename T>"); + statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)"); + begin_scope(); + statement("vec<T, 4> rgba;"); + statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDynamicImageSampler: + statement("enum class spvFormatResolution"); + begin_scope(); + statement("_444 = 0,"); + statement("_422,"); + statement("_420"); + end_scope_decl(); + statement(""); + statement("enum class spvChromaFilter"); + begin_scope(); + statement("nearest = 0,"); + statement("linear"); + end_scope_decl(); + statement(""); + statement("enum class spvXChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrModelConversion"); + begin_scope(); + statement("rgb_identity = 0,"); + statement("ycbcr_identity,"); + statement("ycbcr_bt_709,"); + statement("ycbcr_bt_601,"); + statement("ycbcr_bt_2020"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrRange"); + begin_scope(); + statement("itu_full = 0,"); + statement("itu_narrow"); + end_scope_decl(); + statement(""); + statement("struct spvComponentBits"); + begin_scope(); + statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}"); + statement("uchar value : 6;"); + end_scope_decl(); + statement("// A class corresponding to metal::sampler which holds sampler"); + statement("// Y'CbCr conversion info."); + statement("struct spvYCbCrSampler"); + begin_scope(); + statement("constexpr spvYCbCrSampler() thread : val(build()) {}"); + statement("template<typename... Ts>"); + statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}"); + statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;"); + statement(""); + statement("spvFormatResolution get_resolution() const thread"); + begin_scope(); + statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);"); + end_scope(); + statement("spvChromaFilter get_chroma_filter() const thread"); + begin_scope(); + statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);"); + end_scope(); + statement("spvXChromaLocation get_x_chroma_offset() const thread"); + begin_scope(); + statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);"); + end_scope(); + statement("spvYChromaLocation get_y_chroma_offset() const thread"); + begin_scope(); + statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);"); + end_scope(); + statement("spvYCbCrModelConversion get_ycbcr_model() const thread"); + begin_scope(); + statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);"); + end_scope(); + statement("spvYCbCrRange get_ycbcr_range() const thread"); + begin_scope(); + statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);"); + end_scope(); + statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }"); + statement(""); + statement("private:"); + statement("ushort val;"); + statement(""); + statement("constexpr static constant ushort resolution_bits = 2;"); + statement("constexpr static constant ushort chroma_filter_bits = 2;"); + statement("constexpr static constant ushort x_chroma_off_bit = 1;"); + statement("constexpr static constant ushort y_chroma_off_bit = 1;"); + statement("constexpr static constant ushort ycbcr_model_bits = 3;"); + statement("constexpr static constant ushort ycbcr_range_bit = 1;"); + statement("constexpr static constant ushort bpc_bits = 6;"); + statement(""); + statement("constexpr static constant ushort resolution_base = 0;"); + statement("constexpr static constant ushort chroma_filter_base = 2;"); + statement("constexpr static constant ushort x_chroma_off_base = 4;"); + statement("constexpr static constant ushort y_chroma_off_base = 5;"); + statement("constexpr static constant ushort ycbcr_model_base = 6;"); + statement("constexpr static constant ushort ycbcr_range_base = 9;"); + statement("constexpr static constant ushort bpc_base = 10;"); + statement(""); + statement( + "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;"); + statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << " + "chroma_filter_base;"); + statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << " + "x_chroma_off_base;"); + statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << " + "y_chroma_off_base;"); + statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << " + "ycbcr_model_base;"); + statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << " + "ycbcr_range_base;"); + statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;"); + statement(""); + statement("static constexpr ushort build()"); + begin_scope(); + statement("return 0;"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvFormatResolution res, Ts... t)"); + begin_scope(); + statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)"); + begin_scope(); + statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)"); + begin_scope(); + statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)"); + begin_scope(); + statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);"); + end_scope(); + statement(""); + statement("template<typename... Ts>"); + statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)"); + begin_scope(); + statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);"); + end_scope(); + end_scope_decl(); + statement(""); + statement("// A class which can hold up to three textures and a sampler, including"); + statement("// Y'CbCr conversion info, used to pass combined image-samplers"); + statement("// dynamically to functions."); + statement("template<typename T>"); + statement("struct spvDynamicImageSampler"); + begin_scope(); + statement("texture2d<T> plane0;"); + statement("texture2d<T> plane1;"); + statement("texture2d<T> plane2;"); + statement("sampler samp;"); + statement("spvYCbCrSampler ycbcr_samp;"); + statement("uint swizzle = 0;"); + statement(""); + if (msl_options.swizzle_texture_samples) + { + statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :"); + statement(" plane0(tex), samp(samp), swizzle(sw) {}"); + } + else + { + statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :"); + statement(" plane0(tex), samp(samp) {}"); + } + statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, " + "uint sw) thread :"); + statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement( + "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), " + "swizzle(sw) {}"); + statement(""); + // XXX This is really hard to follow... I've left comments to make it a bit easier. + statement("template<typename... LodOptions>"); + statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement("if (!is_null_texture(plane1))"); + begin_scope(); + statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||"); + statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)"); + begin_scope(); + statement("if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + statement( + "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);"); + end_scope(); // if (resolution == 422 || chroma_filter == nearest) + statement("switch (ycbcr_samp.get_resolution())"); + begin_scope(); + statement("case spvFormatResolution::_444: break;"); + statement("case spvFormatResolution::_422:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + statement("case spvXChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 422: + statement("case spvFormatResolution::_420:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::cosited_even: + statement("case spvXChromaLocation::midpoint:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward<LodOptions>(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward<LodOptions>(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::midpoint + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 420: + end_scope(); // switch (resolution) + end_scope(); // if (multiplanar) + statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);"); + end_scope(); // do_sample() + statement("template <typename... LodOptions>"); + statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement( + "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);"); + statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)"); + statement(" return s;"); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_range())"); + begin_scope(); + statement("case spvYCbCrRange::itu_full:"); + statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + statement("case spvYCbCrRange::itu_narrow:"); + statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + end_scope(); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_model())"); + begin_scope(); + statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning + statement("case spvYCbCrModelConversion::ycbcr_identity:"); + statement(" return s;"); + statement("case spvYCbCrModelConversion::ycbcr_bt_709:"); + statement(" return spvConvertYCbCrBT709(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_601:"); + statement(" return spvConvertYCbCrBT601(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_2020:"); + statement(" return spvConvertYCbCrBT2020(s);"); + end_scope(); + end_scope(); + statement(""); + // Sampler Y'CbCr conversion forbids offsets. + statement("vec<T, 4> sample(float2 coord, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, offset);"); + end_scope(); + statement("template<typename lod_options>"); + statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, options, offset);"); + end_scope(); + statement("#if __HAVE_MIN_LOD_CLAMP__"); + statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, b, min_lod, offset);"); + end_scope(); + statement( + "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, grad, min_lod, offset);"); + end_scope(); + statement("#endif"); + statement(""); + // Y'CbCr conversion forbids all operations but sampling. + statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread"); + begin_scope(); + statement("return plane0.read(coord, lod);"); + end_scope(); + statement(""); + statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);"); + else + statement("return plane0.gather(samp, coord, offset, c);"); + end_scope(); + end_scope_decl(); + statement(""); default: break; @@ -2938,10 +4567,27 @@ void CompilerMSL::emit_resources() void CompilerMSL::emit_specialization_constants_and_structs() { SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); bool emitted = false; unordered_set<uint32_t> declared_structs; + unordered_set<uint32_t> aligned_structs; + + // First, we need to deal with scalar block layout. + // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself. + // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types. + // This makes the struct alignment as small as physically possible. + // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types. + ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + mark_scalar_layout_structs(type); + }); + + // Very particular use of the soft loop lock. + // align_struct may need to create custom types on the fly, but we don't care about + // these types for purpose of iterating over them in ir.ids_for_type and friends. + auto loop_lock = ir.create_loop_soft_lock(); for (auto &id_ : ir.ids_for_constant_or_type) { @@ -3015,7 +4661,7 @@ void CompilerMSL::emit_specialization_constants_and_structs() // Output non-builtin interface structs. These include local function structs // and structs nested within uniform and read-write buffers. auto &type = id.get<SPIRType>(); - uint32_t type_id = type.self; + TypeID type_id = type.self; bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty(); bool is_block = @@ -3043,8 +4689,8 @@ void CompilerMSL::emit_specialization_constants_and_structs() declared_structs.insert(type_id); - if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked)) - align_struct(type); + if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + align_struct(type, aligned_structs); // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. emit_struct(get<SPIRType>(type_id)); @@ -3154,7 +4800,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l } else if (is_array(mbr_type)) { - for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++) + for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++) { set<SPIRConstant>(const_mbr_id, type_id, index, false); auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, @@ -3183,7 +4829,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l else // Must be an array { assert(is_array(*type)); - for (uint32_t j = 0; j < type->array[0]; j++, index++) + for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++) { set<SPIRConstant>(const_mbr_id, type_id, index, false); auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); @@ -3230,9 +4876,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // Mark the result as being packed if necessary. if (meta.storage_is_packed) - set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); - if (meta.storage_packed_type != 0) - set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); @@ -3251,7 +4897,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // expression so we don't try to dereference it as a variable pointer. // Don't do this if the index is a constant 1, though. We need to drop stores // to that one. - auto *m = ir.find_meta(var ? var->self : 0); + auto *m = ir.find_meta(var ? var->self : ID(0)); if (get_execution_model() == ExecutionModelTessellationControl && var && m && m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles)) { @@ -3436,21 +5082,36 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) // Bitfield case OpBitFieldInsert: - MSL_QFOP(insert_bits); + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt); break; + } case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type, + SPIRType::UInt, SPIRType::UInt); + break; + } + case OpBitFieldUExtract: - MSL_TFOP(extract_bits); + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type, + SPIRType::UInt, SPIRType::UInt); break; + } case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. MSL_UFOP(reverse_bits); break; case OpBitCount: - MSL_UFOP(popcount); + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype); break; + } case OpFRem: MSL_BFOP(fmod); @@ -3628,11 +5289,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) auto store_type = texel_type; store_type.vecsize = 4; - statement(join( - to_expression(img_id), ".write(", remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), - ", ", - to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, &forward), - ");")); + statement(join(to_expression(img_id), ".write(", + remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ", + to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, + 0, &forward), + ");")); if (p_var && variable_storage_is_aliased(*p_var)) flush_all_aliased_variables(); @@ -3686,7 +5347,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } case OpImageQueryLod: - SPIRV_CROSS_THROW("MSL does not support textureQueryLod()."); + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up."); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t image_id = ops[2]; + uint32_t coord_id = ops[3]; + emit_uninitialized_temporary_expression(result_type, id); + + auto sampler_expr = to_sampler_expression(image_id); + auto *combined = maybe_get<SPIRCombinedImageSampler>(image_id); + auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id); + + // TODO: It is unclear if calculcate_clamped_lod also conditionally rounds + // the reported LOD based on the sampler. NEAREST miplevel should + // round the LOD, but LINEAR miplevel should not round. + // Let's hope this does not become an issue ... + statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ", + to_expression(coord_id), ");"); + statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ", + to_expression(coord_id), ");"); + register_control_dependent_expression(id); + break; + } #define MSL_ImgQry(qrytype) \ do \ @@ -3723,10 +5407,14 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } else { - auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); auto *var = maybe_get_backing_variable(ops[2]); + SPIRExpression *e; + if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler)) + e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true); + else + e = &emit_op(result_type, id, to_expression(ops[2]), true, true); if (var) - e.loaded_from = var->self; + e->loaded_from = var->self; } break; } @@ -3801,36 +5489,31 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_barrier(ops[0], ops[1], ops[2]); break; - case OpVectorTimesMatrix: - case OpMatrixTimesVector: + case OpOuterProduct: { - // If the matrix needs transpose and it is square or packed, just flip the multiply order. - uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3]; - auto *e = maybe_get<SPIRExpression>(mtx_id); - auto &t = expression_type(mtx_id); - bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); - if (e && e->need_transpose && (t.columns == t.vecsize || is_packed)) - { - e->need_transpose = false; - - // This is important for matrices. Packed matrices - // are generally transposed, so unpacking using a constructor argument - // will result in an error. - // The simplest solution for now is to just avoid unpacking the matrix in this operation. - unset_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; - emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); - if (is_packed) - set_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); - e->need_transpose = true; + auto &type = get<SPIRType>(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; } - else - MSL_BOP(*); + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); break; } - // OpOuterProduct - case OpIAddCarry: case OpISubBorrow: { @@ -3838,10 +5521,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get<SPIRType>(result_type); - statement(variable_decl(type, to_name(result_id)), ";"); - set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + emit_uninitialized_temporary_expression(result_type, result_id); auto &res_type = get<SPIRType>(type.member_types[1]); if (opcode == OpIAddCarry) @@ -3870,10 +5551,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; - forced_temporaries.insert(result_id); auto &type = get<SPIRType>(result_type); - statement(variable_decl(type, to_name(result_id)), ";"); - set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + emit_uninitialized_temporary_expression(result_type, result_id); statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ", to_enclosed_expression(op1), ";"); @@ -3882,6 +5561,91 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpArrayLength: + { + auto &type = expression_type(ops[2]); + uint32_t offset = type_struct_member_offset(type, ops[3]); + uint32_t stride = type_struct_member_array_stride(type, ops[3]); + + auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride); + emit_op(ops[0], ops[1], expr, true); + break; + } + + // SPV_INTEL_shader_integer_functions2 + case OpUCountLeadingZerosINTEL: + MSL_UFOP(clz); + break; + + case OpUCountTrailingZerosINTEL: + MSL_UFOP(ctz); + break; + + case OpAbsISubINTEL: + case OpAbsUSubINTEL: + MSL_BFOP(absdiff); + break; + + case OpIAddSatINTEL: + case OpUAddSatINTEL: + MSL_BFOP(addsat); + break; + + case OpIAverageINTEL: + case OpUAverageINTEL: + MSL_BFOP(hadd); + break; + + case OpIAverageRoundedINTEL: + case OpUAverageRoundedINTEL: + MSL_BFOP(rhadd); + break; + + case OpISubSatINTEL: + case OpUSubSatINTEL: + MSL_BFOP(subsat); + break; + + case OpIMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("int(short(", to_expression(a), ")) * int(short(", to_expression(b), "))"), + forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpUMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("uint(ushort(", to_expression(a), ")) * uint(ushort(", to_expression(b), "))"), + forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpIsHelperInvocationEXT: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + emit_op(ops[0], ops[1], "simd_is_helper_thread()", false); + break; + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Raster order groups require MSL 2.0."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -3895,33 +5659,70 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) return; - string bar_stmt = "threadgroup_barrier(mem_flags::"; + uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation); + uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation); + // Use the wider of the two scopes (smaller value) + exe_scope = min(exe_scope, mem_scope); + + string bar_stmt; + if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) + bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; + else + bar_stmt = "threadgroup_barrier"; + bar_stmt += "("; uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone); - if (get_execution_model() == ExecutionModelTessellationControl) + // Use the | operator to combine flags if we can. + if (msl_options.supports_msl_version(1, 2)) + { + string mem_flags = ""; // For tesc shaders, this also affects objects in the Output storage class. // Since in Metal, these are placed in a device buffer, we have to sync device memory here. - bar_stmt += "mem_device"; - else if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask) - bar_stmt += "mem_device"; - else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask)) - bar_stmt += "mem_threadgroup"; - else if (mem_sem & MemorySemanticsImageMemoryMask) - bar_stmt += "mem_texture"; + if (get_execution_model() == ExecutionModelTessellationControl || + (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))) + mem_flags += "mem_flags::mem_device"; + if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask)) + { + if (!mem_flags.empty()) + mem_flags += " | "; + mem_flags += "mem_flags::mem_threadgroup"; + } + if (mem_sem & MemorySemanticsImageMemoryMask) + { + if (!mem_flags.empty()) + mem_flags += " | "; + mem_flags += "mem_flags::mem_texture"; + } + + if (mem_flags.empty()) + mem_flags = "mem_flags::mem_none"; + + bar_stmt += mem_flags; + } else - bar_stmt += "mem_none"; + { + if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) && + (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask))) + bar_stmt += "mem_flags::mem_device_and_threadgroup"; + else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) + bar_stmt += "mem_flags::mem_device"; + else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask)) + bar_stmt += "mem_flags::mem_threadgroup"; + else if (mem_sem & MemorySemanticsImageMemoryMask) + bar_stmt += "mem_flags::mem_texture"; + else + bar_stmt += "mem_flags::mem_none"; + } if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1))) { bar_stmt += ", "; - // Use the wider of the two scopes (smaller value) - uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation); - uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation); - uint32_t scope = min(exe_scope, mem_scope); - switch (scope) + switch (mem_scope) { case ScopeCrossDevice: case ScopeDevice: @@ -3949,7 +5750,8 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin flush_all_active_variables(); } -void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage, + StorageClass rhs_storage) { // Assignment from an array initializer is fine. auto &type = expression_type(rhs_id); @@ -3977,21 +5779,32 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id) if (type.array.size() > SPVFuncImplArrayCopyMultidimMax) SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size()); - if (spv_function_implementations.count(func) == 0) - { - spv_function_implementations.insert(func); - suppress_missing_prototypes = true; - force_recompile(); - } - } - else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0) - { - spv_function_implementations.insert(SPVFuncImplArrayCopy); - suppress_missing_prototypes = true; - force_recompile(); + add_spv_func_and_recompile(func); } + else + add_spv_func_and_recompile(SPVFuncImplArrayCopy); + + bool lhs_thread = lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || + lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate; + bool rhs_thread = rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || + rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate; + + const char *tag = nullptr; + if (lhs_thread && is_constant) + tag = "FromConstantToStack"; + else if (lhs_storage == StorageClassWorkgroup && is_constant) + tag = "FromConstantToThreadGroup"; + else if (lhs_thread && rhs_thread) + tag = "FromStackToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_thread) + tag = "FromStackToThreadGroup"; + else if (lhs_thread && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToThreadGroup"; + else + SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); - const char *tag = is_constant ? "FromConstant" : "FromStack"; statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); } @@ -4028,7 +5841,8 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) if (p_v_lhs) flush_variable_declaration(p_v_lhs->self); - emit_array_copy(to_expression(id_lhs), id_rhs); + emit_array_copy(to_expression(id_lhs), id_rhs, get_backing_variable_storage(id_lhs), + get_backing_variable_storage(id_rhs)); register_write(id_lhs); return true; @@ -4039,12 +5853,10 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1, bool op1_is_pointer, bool op1_is_literal, uint32_t op2) { - forced_temporaries.insert(result_id); - string exp = string(op) + "("; auto &type = get_pointee_type(expression_type(obj)); - exp += "(volatile "; + exp += "("; auto *var = maybe_get_backing_variable(obj); if (!var) SPIRV_CROSS_THROW("No backing variable for atomic operation."); @@ -4079,12 +5891,11 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing. // The function updates the comparitor value from the memory value, so the additional // comparison test evaluates the memory value against the expected value. - statement(variable_decl(type, to_name(result_id)), ";"); + emit_uninitialized_temporary_expression(result_type, result_id); statement("do"); begin_scope(); statement(to_name(result_id), " = ", to_expression(op1), ";"); end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")")); - set<SPIRExpression>(result_id, to_name(result_id), result_type, true); } else { @@ -4138,12 +5949,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, emit_unary_func_op(result_type, id, args[0], "rint"); break; + case GLSLstd450FindILsb: + { + // In this template version of findLSB, we return T. + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype); + break; + } + case GLSLstd450FindSMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type); + emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type); break; case GLSLstd450FindUMsb: - emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type); + emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type); break; case GLSLstd450PackSnorm4x8: @@ -4267,12 +6086,130 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) // GLSLstd450InterpolateAtOffset + case GLSLstd450Distance: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to length(a - b) -> abs(a - b). + emit_op(result_type, id, + join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"), + should_forward(args[0]) && should_forward(args[1])); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Length: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to abs(). + emit_unary_func_op(result_type, id, args[0], "abs"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Normalize: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get<SPIRType>(result_type).vecsize == 1) + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get<SPIRType>(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450FaceForward: + if (get<SPIRType>(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary. + auto *ptr = maybe_get<SPIRExpression>(args[1]); + if (ptr && ptr->access_chain && is_scalar(expression_type(args[1]))) + { + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + + // Need to create temporaries and copy over to access chain after. + // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ... + uint32_t &tmp_id = extra_sub_expressions[id]; + if (!tmp_id) + tmp_id = ir.increase_bound_by(1); + + uint32_t tmp_type_id = get_pointee_type_id(ptr->expression_type); + emit_uninitialized_temporary_expression(tmp_type_id, tmp_id); + emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp"); + statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; } } +void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t count) +{ + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1."); + + auto op = static_cast<AMDShaderTrinaryMinMax>(eop); + + switch (op) + { + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3"); + break; + default: + CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count); + break; + } +} + // Emit a structure declaration for the specified interface variable. void CompilerMSL::emit_interface_block(uint32_t ib_var_id) { @@ -4293,9 +6230,10 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) add_function_overload(func); local_variable_names = resource_names; - string decl; - processing_entry_point = (func.self == ir.default_entry_point); + processing_entry_point = func.self == ir.default_entry_point; + + string decl = processing_entry_point ? "" : "inline "; auto &type = get<SPIRType>(func.return_type); @@ -4306,7 +6244,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) else { // We cannot return arrays in MSL, so "return" through an out variable. - decl = "void"; + decl += "void"; } decl += " "; @@ -4338,7 +6276,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) for (auto var_id : vars_needing_early_declaration) { auto &ed_var = get<SPIRVariable>(var_id); - uint32_t &initializer = ed_var.initializer; + ID &initializer = ed_var.initializer; if (!initializer) initializer = ir.increase_bound_by(1); @@ -4367,16 +6305,36 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) decl += argument_decl(arg); - // Manufacture automatic sampler arg for SampledImage texture + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + auto &arg_type = get<SPIRType>(arg.type); - if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer) - decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (auto *constexpr_sampler = find_constexpr_sampler(name_id)) + if (constexpr_sampler->ycbcr_conversion_enable) + planes = constexpr_sampler->planes; + for (uint32_t i = 1; i < planes; i++) + decl += join(", ", argument_decl(arg), plane_name_suffix, i); + + // Manufacture automatic sampler arg for SampledImage texture + if (arg_type.image.dim != DimBuffer) + decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + } // Manufacture automatic swizzle arg. - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type)) + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) && + !is_dynamic_img_sampler) { bool arg_is_array = !arg_type.array.empty(); - decl += join(", constant uint32_t", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id)); + decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id)); + } + + if (buffers_requiring_array_length.count(name_id)) + { + bool arg_is_array = !arg_type.array.empty(); + decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id)); } if (&arg != &func.arguments.back()) @@ -4387,85 +6345,237 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) statement(decl); } +static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler) +{ + // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images + // use implicit reconstruction. + return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1; +} + // Returns the texture sampling function string for the specified image and sampling characteristics. -string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, - bool has_offset, bool, bool has_dref, uint32_t) +string CompilerMSL::to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, + bool, bool, bool has_dref, uint32_t, uint32_t) { + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + // Special-case gather. We have to alter the component being looked up // in the swizzle case. - if (msl_options.swizzle_texture_samples && is_gather) + if (msl_options.swizzle_texture_samples && is_gather && !is_dynamic_img_sampler && + (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) { - string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; - fname += "<" + type_to_glsl(get<SPIRType>(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype); - // Add the arg types ourselves. Yes, this sucks, but Clang can't - // deduce template pack parameters in the middle of an argument list. - switch (imgtype.image.dim) - { - case Dim2D: - fname += ", float2"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - if (!imgtype.image.depth || has_offset) - fname += ", int2"; - break; - case DimCube: - fname += ", float3"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - break; - default: - SPIRV_CROSS_THROW("Invalid texture dimension for gather op."); - } - fname += ">"; - return fname; + add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); + return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; } auto *combined = maybe_get<SPIRCombinedImageSampler>(img); // Texture reference - string fname = to_expression(combined ? combined->image : img) + "."; - if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype)) - fname = "spvTextureSwizzle(" + fname; - - // Texture function and sampler - if (is_fetch) - fname += "read"; - else if (is_gather) - fname += "gather"; + string fname; + if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler) + { + if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3) + SPIRV_CROSS_THROW("Unhandled number of color image planes!"); + // 444 images aren't downsampled, so we don't need to do linear filtering. + if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 || + constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST) + { + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane); + fname = "spvChromaReconstructNearest"; + } + else // Linear with a downsampled format + { + fname = "spvChromaReconstructLinear"; + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + assert(false); + break; // not reached + case MSL_FORMAT_RESOLUTION_422: + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane); + fname += "422CositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane); + fname += "422Midpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid chroma location."); + } + break; + case MSL_FORMAT_RESOLUTION_420: + fname += "420"; + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane); + fname += "XCositedEvenYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane); + fname += "XCositedEvenYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane); + fname += "XMidpointYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane); + fname += "XMidpointYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid X chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + } + } else - fname += "sample"; + { + fname = to_expression(combined ? combined->image : img) + "."; - if (has_dref) - fname += "_compare"; + // Texture function and sampler + if (is_fetch) + fname += "read"; + else if (is_gather) + fname += "gather"; + else + fname += "sample"; + + if (has_dref) + fname += "_compare"; + } return fname; } +string CompilerMSL::convert_to_f32(const string &expr, uint32_t components) +{ + SPIRType t; + t.basetype = SPIRType::Float; + t.vecsize = components; + t.columns = 1; + return join(type_to_glsl_constructor(t), "(", expr, ")"); +} + +static inline bool sampling_type_needs_f32_conversion(const SPIRType &type) +{ + // Double is not supported to begin with, but doesn't hurt to check for completion. + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double; +} + // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, - uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, - uint32_t sample, bool *p_forward) +string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, + uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, + uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) { + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + string farg_str; - if (!is_fetch) - farg_str += to_sampler_expression(img); + bool forward = true; - if (msl_options.swizzle_texture_samples && is_gather) + if (!is_dynamic_img_sampler) { - if (!farg_str.empty()) - farg_str += ", "; + // Texture reference (for some cases) + if (needs_chroma_reconstruction(constexpr_sampler)) + { + // Multiplanar images need two or three textures. + farg_str += to_expression(img); + for (uint32_t i = 1; i < constexpr_sampler->planes; i++) + farg_str += join(", ", to_expression(img), plane_name_suffix, i); + } + else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && is_gather) + { + auto *combined = maybe_get<SPIRCombinedImageSampler>(img); + farg_str += to_expression(combined ? combined->image : img); + } - auto *combined = maybe_get<SPIRCombinedImageSampler>(img); - farg_str += to_expression(combined ? combined->image : img); + // Sampler reference + if (!is_fetch) + { + if (!farg_str.empty()) + farg_str += ", "; + farg_str += to_sampler_expression(img); + } + + if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && is_gather) + { + // Add the swizzle constant from the swizzle buffer. + farg_str += ", " + to_swizzle_expression(img); + used_swizzle_buffer = true; + } + + // Swizzled gather puts the component before the other args, to allow template + // deduction to work. + if (comp && msl_options.swizzle_texture_samples) + { + forward = should_forward(comp); + farg_str += ", " + to_component_argument(comp); + } } // Texture coordinates - bool forward = should_forward(coord); + forward = forward && should_forward(coord); auto coord_expr = to_enclosed_expression(coord); auto &coord_type = expression_type(coord); bool coord_is_fp = type_is_floating_point(coord_type); @@ -4483,6 +6593,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 1); alt_coord_component = 1; break; @@ -4518,6 +6630,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 2); alt_coord_component = 2; break; @@ -4528,6 +6642,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); alt_coord_component = 3; break; @@ -4545,6 +6661,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool tex_coords = enclose_expression(tex_coords) + ".xyz"; } + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); + alt_coord_component = 3; break; @@ -4575,7 +6694,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool // If projection, use alt coord as divisor if (is_proj) - tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + { + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1); + else + tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + } if (!farg_str.empty()) farg_str += ", "; @@ -4609,11 +6733,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool forward = forward && should_forward(dref); farg_str += ", "; + auto &dref_type = expression_type(dref); + + string dref_expr; if (is_proj) - farg_str += - to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component); + dref_expr = + join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component)); else - farg_str += to_expression(dref); + dref_expr = to_expression(dref); + + if (sampling_type_needs_f32_conversion(dref_type)) + dref_expr = convert_to_f32(dref_expr, 1); + + farg_str += dref_expr; if (msl_options.is_macos() && (grad_x || grad_y)) { @@ -4706,6 +6838,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")"; } + if (minlod) + { + if (msl_options.is_macos()) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS."); + } + else if (msl_options.is_ios()) + SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS."); + + forward = forward && should_forward(minlod); + farg_str += ", min_lod_clamp(" + to_expression(minlod) + ")"; + } + // Add offsets string offset_expr; if (coffset && !is_fetch) @@ -4748,25 +6894,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (imgtype.image.dim == Dim2D && offset_expr.empty()) farg_str += ", int2(0)"; - forward = forward && should_forward(comp); - farg_str += ", " + to_component_argument(comp); + if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler) + { + forward = forward && should_forward(comp); + farg_str += ", " + to_component_argument(comp); + } } if (sample) { + forward = forward && should_forward(sample); farg_str += ", "; farg_str += to_expression(sample); } - if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype)) - { - // Add the swizzle constant from the swizzle buffer. - if (!is_gather) - farg_str += ")"; - farg_str += ", " + to_swizzle_expression(img); - used_aux_buffer = true; - } - *p_forward = forward; return farg_str; @@ -4813,12 +6954,216 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id); } +string CompilerMSL::to_texture_op(const Instruction &i, bool *forward, SmallVector<uint32_t> &inherited_expressions) +{ + auto *ops = stream(i); + uint32_t result_type_id = ops[0]; + uint32_t img = ops[2]; + auto &result_type = get<SPIRType>(result_type_id); + auto op = static_cast<Op>(i.op); + bool is_gather = (op == OpImageGather || op == OpImageDrefGather); + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get<SPIRType>(type.self); + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + string expr; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + // If this needs sampler Y'CbCr conversion, we need to do some additional + // processing. + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709); + expr += "spvConvertYCbCrBT709("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601); + expr += "spvConvertYCbCrBT601("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020); + expr += "spvConvertYCbCrBT2020("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + switch (constexpr_sampler->ycbcr_range) + { + case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL: + add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange); + expr += "spvExpandITUFullRange("; + break; + case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW: + add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange); + expr += "spvExpandITUNarrowRange("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr range."); + } + } + } + else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + add_spv_func_and_recompile(SPVFuncImplTextureSwizzle); + expr += "spvTextureSwizzle("; + } + + string inner_expr = CompilerGLSL::to_texture_op(i, forward, inherited_expressions); + + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + if (!constexpr_sampler->swizzle_is_identity()) + { + static const char swizzle_names[] = "rgba"; + if (!constexpr_sampler->swizzle_has_one_or_zero()) + { + // If we can, do it inline. + expr += inner_expr + "."; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + } + } + else + { + // Otherwise, we need to emit a temporary and swizzle that. + uint32_t temp_id = ir.increase_bound_by(1); + emit_op(result_type_id, temp_id, inner_expr, false); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(temp_id, inherit); + inherited_expressions.clear(); + inherited_expressions.push_back(temp_id); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(temp_id); + break; + + default: + break; + } + expr += type_to_glsl(result_type) + "("; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += to_expression(temp_id) + "." + swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_ZERO: + expr += "0"; + break; + case MSL_COMPONENT_SWIZZLE_ONE: + expr += "1"; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += to_expression(temp_id) + "." + + swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + if (c < 3) + expr += ", "; + } + expr += ")"; + } + } + else + expr += inner_expr; + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + expr += join(", ", constexpr_sampler->bpc, ")"); + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + expr += ")"; + } + } + else + { + expr += inner_expr; + if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + // Add the swizzle constant from the swizzle buffer. + expr += ", " + to_swizzle_expression(img) + ")"; + used_swizzle_buffer = true; + } + } + + return expr; +} + +static string create_swizzle(MSLComponentSwizzle swizzle) +{ + switch (swizzle) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + return "spvSwizzle::none"; + case MSL_COMPONENT_SWIZZLE_ZERO: + return "spvSwizzle::zero"; + case MSL_COMPONENT_SWIZZLE_ONE: + return "spvSwizzle::one"; + case MSL_COMPONENT_SWIZZLE_R: + return "spvSwizzle::red"; + case MSL_COMPONENT_SWIZZLE_G: + return "spvSwizzle::green"; + case MSL_COMPONENT_SWIZZLE_B: + return "spvSwizzle::blue"; + case MSL_COMPONENT_SWIZZLE_A: + return "spvSwizzle::alpha"; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + return ""; + } +} + // Returns a string representation of the ID, usable as a function arg. // Manufacture automatic sampler arg for SampledImage texture. -string CompilerMSL::to_func_call_arg(uint32_t id) +string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { string arg_str; + auto &type = expression_type(id); + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around. + bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler); + if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) + arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">("); + auto *c = maybe_get<SPIRConstant>(id); if (c && !get<SPIRType>(c->constant_type).array.empty()) { @@ -4833,7 +7178,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) // so just create a thread local copy in the current function. arg_str = join("_", id, "_array_copy"); auto &constants = current_function->constant_arrays_needed_on_stack; - auto itr = find(begin(constants), end(constants), id); + auto itr = find(begin(constants), end(constants), ID(id)); if (itr == end(constants)) { force_recompile(); @@ -4841,22 +7186,106 @@ string CompilerMSL::to_func_call_arg(uint32_t id) } } else - arg_str = CompilerGLSL::to_func_call_arg(id); + arg_str += CompilerGLSL::to_func_call_arg(arg, id); - // Manufacture automatic sampler arg if the arg is a SampledImage texture. - auto &type = expression_type(id); - if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + if (!arg_is_dynamic_img_sampler) { // Need to check the base variable in case we need to apply a qualified alias. uint32_t var_id = 0; - auto *sampler_var = maybe_get<SPIRVariable>(id); - if (sampler_var) - var_id = sampler_var->basevariable; + auto *var = maybe_get<SPIRVariable>(id); + if (var) + var_id = var->basevariable; - arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id); + if (type.basetype == SPIRType::SampledImage) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + planes = constexpr_sampler->planes; + // If this parameter isn't aliasing a global, then we need to use + // the special "dynamic image-sampler" class to pass it--and we need + // to use it for *every* non-alias parameter, in case a combined + // image-sampler with a Y'CbCr conversion is passed. Hopefully, this + // pathological case is so rare that it should never be hit in practice. + if (!arg.alias_global_variable) + add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler); + } + for (uint32_t i = 1; i < planes; i++) + arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i); + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + if (type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + + // Add sampler Y'CbCr conversion info if we have it + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + SmallVector<string> samp_args; + + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + // Default + break; + case MSL_FORMAT_RESOLUTION_422: + samp_args.push_back("spvFormatResolution::_422"); + break; + case MSL_FORMAT_RESOLUTION_420: + samp_args.push_back("spvFormatResolution::_420"); + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + + if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST) + samp_args.push_back("spvChromaFilter::linear"); + + if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvXChromaLocation::midpoint"); + if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvYChromaLocation::midpoint"); + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020"); + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL) + samp_args.push_back("spvYCbCrRange::itu_narrow"); + samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")")); + arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")"); + } + } + + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(", + create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(", + create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(", + create_swizzle(constexpr_sampler->swizzle[0]), ")"); + else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); + + if (buffers_requiring_array_length.count(var_id)) + arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); + + if (is_dynamic_img_sampler) + arg_str += ")"; } - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) - arg_str += ", " + to_swizzle_expression(id); return arg_str; } @@ -4867,7 +7296,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) string CompilerMSL::to_sampler_expression(uint32_t id) { auto *combined = maybe_get<SPIRCombinedImageSampler>(id); - auto expr = to_expression(combined ? combined->image : id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); uint32_t samp_id = 0; @@ -4887,9 +7316,15 @@ string CompilerMSL::to_sampler_expression(uint32_t id) string CompilerMSL::to_swizzle_expression(uint32_t id) { auto *combined = maybe_get<SPIRCombinedImageSampler>(id); - auto expr = to_expression(combined ? combined->image : id); + + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); + // If an image is part of an argument buffer translate this to a legal identifier. + for (auto &c : expr) + if (c == '.') + c = '_'; + if (index == string::npos) return expr + swizzle_name_suffix; else @@ -4900,6 +7335,32 @@ string CompilerMSL::to_swizzle_expression(uint32_t id) } } +string CompilerMSL::to_buffer_size_expression(uint32_t id) +{ + auto expr = to_expression(id); + auto index = expr.find_first_of('['); + + // This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to + // the pointer expression spvDescriptorSetN.name to make a reasonable expression here. + // This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set. + if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*') + expr = address_of_expression(expr); + + // If a buffer is part of an argument buffer translate this to a legal identifier. + for (auto &c : expr) + if (c == '.') + c = '_'; + + if (index == string::npos) + return expr + buffer_size_name_suffix; + else + { + auto buffer_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return buffer_expr + buffer_size_name_suffix + array_expr; + } +} + // Checks whether the type is a Block all of whose members have DecorationPatch. bool CompilerMSL::is_patch_block(const SPIRType &type) { @@ -4918,91 +7379,33 @@ bool CompilerMSL::is_patch_block(const SPIRType &type) // Checks whether the ID is a row_major matrix that requires conversion before use bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id) { - // Natively supported row-major matrices do not need to be converted. - if (backend.native_row_major_matrix) - return false; - - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_decoration(id, DecorationRowMajor)) - return false; - - // Generate a function that will swap matrix elements from row-major to column-major. - // Packed row-matrix should just use transpose() function. - if (!has_extended_decoration(id, SPIRVCrossDecorationPacked)) - { - const auto type = expression_type(id); - add_convert_row_major_matrix_function(type.columns, type.vecsize); - } - - return true; + auto *e = maybe_get<SPIRExpression>(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); } // Checks whether the member is a row_major matrix that requires conversion before use bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) { - // Natively supported row-major matrices do not need to be converted. - if (backend.native_row_major_matrix) - return false; - - // Non-matrix or column-major matrix types do not need to be converted. - if (!has_member_decoration(type.self, index, DecorationRowMajor)) - return false; - - // Generate a function that will swap matrix elements from row-major to column-major. - // Packed row-matrix should just use transpose() function. - if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked)) - { - const auto mbr_type = get<SPIRType>(type.member_types[index]); - add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize); - } - - return true; + return has_member_decoration(type.self, index, DecorationRowMajor); } -// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix. -void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows) +string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) { - SPVFuncImpl spv_func; - if (cols == rows) // Square matrix...just use transpose() function - return; - else if (cols == 2 && rows == 3) - spv_func = SPVFuncImplRowMajor2x3; - else if (cols == 2 && rows == 4) - spv_func = SPVFuncImplRowMajor2x4; - else if (cols == 3 && rows == 2) - spv_func = SPVFuncImplRowMajor3x2; - else if (cols == 3 && rows == 4) - spv_func = SPVFuncImplRowMajor3x4; - else if (cols == 4 && rows == 2) - spv_func = SPVFuncImplRowMajor4x2; - else if (cols == 4 && rows == 3) - spv_func = SPVFuncImplRowMajor4x3; - else - SPIRV_CROSS_THROW("Could not convert row-major matrix."); - - auto rslt = spv_function_implementations.insert(spv_func); - if (rslt.second) + if (!is_matrix(exp_type)) { - suppress_missing_prototypes = true; - force_recompile(); + return CompilerGLSL::convert_row_major_matrix(move(exp_str), exp_type, physical_type_id, is_packed); } -} - -// Wraps the expression string in a function call that converts the -// row_major matrix result of the expression to a column_major matrix. -string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed) -{ - strip_enclosed_expression(exp_str); - - string func_name; - - // Square and packed matrices can just use transpose - if (exp_type.columns == exp_type.vecsize || is_packed) - func_name = "transpose"; else - func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize); - - return join(func_name, "(", exp_str, ")"); + { + strip_enclosed_expression(exp_str); + if (physical_type_id != 0 || is_packed) + exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true); + return join("transpose(", exp_str, ")"); + } } // Called automatically at the end of the entry point function @@ -5025,55 +7428,66 @@ void CompilerMSL::emit_fixup() string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier) { - auto &membertype = get<SPIRType>(member_type_id); - - // If this member requires padding to maintain alignment, emit a dummy padding member. - MSLStructMemberKey key = get_struct_member_key(type.self, index); - uint32_t pad_len = struct_member_padding[key]; - if (pad_len > 0) - statement("char _m", index, "_pad", "[", to_string(pad_len), "];"); + if (member_is_remapped_physical_type(type, index)) + member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); + auto &physical_type = get<SPIRType>(member_type_id); // If this member is packed, mark it as so. - string pack_pfx = ""; - - const SPIRType *effective_membertype = &membertype; - SPIRType override_type; + string pack_pfx; uint32_t orig_id = 0; if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)) orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID); - if (member_is_packed_type(type, index)) + bool row_major = false; + if (is_matrix(physical_type)) + row_major = has_member_decoration(type.self, index, DecorationRowMajor); + + SPIRType row_major_physical_type; + const SPIRType *declared_type = &physical_type; + + if (member_is_packed_physical_type(type, index)) { // If we're packing a matrix, output an appropriate typedef - if (membertype.basetype == SPIRType::Struct) + if (physical_type.basetype == SPIRType::Struct) { - pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ "; + SPIRV_CROSS_THROW("Cannot emit a packed struct currently."); } - else if (membertype.vecsize > 1 && membertype.columns > 1) + else if (is_matrix(physical_type)) { + uint32_t rows = physical_type.vecsize; + uint32_t cols = physical_type.columns; pack_pfx = "packed_"; - string base_type = membertype.width == 16 ? "half" : "float"; + if (row_major) + { + // These are stored transposed. + rows = physical_type.columns; + cols = physical_type.vecsize; + pack_pfx = "packed_rm_"; + } + string base_type = physical_type.width == 16 ? "half" : "float"; string td_line = "typedef "; - td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns); + td_line += "packed_" + base_type + to_string(rows); td_line += " " + pack_pfx; - td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize); + // Use the actual matrix size here. + td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize); + td_line += "[" + to_string(cols) + "]"; td_line += ";"; add_typedef_line(td_line); } - else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct) - { - // A "packed" float array, but we pad here instead to 4-vector. - override_type = membertype; - override_type.vecsize = 4; - effective_membertype = &override_type; - } else pack_pfx = "packed_"; } + else if (row_major) + { + // Need to declare type with flipped vecsize/columns. + row_major_physical_type = physical_type; + swap(row_major_physical_type.vecsize, row_major_physical_type.columns); + declared_type = &row_major_physical_type; + } // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. - if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2) + if (msl_options.is_ios() && physical_type.basetype == SPIRType::Image && physical_type.image.sampled == 2) { if (!has_decoration(orig_id, DecorationNonWritable)) SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS."); @@ -5081,13 +7495,13 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ // Array information is baked into these types. string array_type; - if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler && - membertype.basetype != SPIRType::SampledImage) + if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler && + physical_type.basetype != SPIRType::SampledImage) { - array_type = type_to_array_glsl(membertype); + array_type = type_to_array_glsl(physical_type); } - return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index), + return join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";"); } @@ -5095,9 +7509,26 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { + // If this member requires padding to maintain its declared offset, emit a dummy padding member before it. + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget)) + { + uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget); + statement("char _m", index, "_pad", "[", pad_len, "];"); + } + statement(to_struct_member(type, member_type_id, index, qualifier)); } +void CompilerMSL::emit_struct_padding_target(const SPIRType &type) +{ + uint32_t struct_size = get_declared_struct_size_msl(type, true, true); + uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget); + if (target_size < struct_size) + SPIRV_CROSS_THROW("Cannot pad with negative bytes."); + else if (target_size > struct_size) + statement("char _m0_final_padding[", target_size - struct_size, "];"); +} + // Return a MSL qualifier for the specified function attribute member string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index) { @@ -5109,9 +7540,16 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in BuiltIn builtin = BuiltInMax; bool is_builtin = is_member_builtin(type, index, &builtin); - if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID)) - return join(" [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID), - ")]]"); + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) + { + string quals = join( + " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")"); + if (interlocked_resources.count( + get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))) + quals += ", raster_order_group(0)"; + quals += "]]"; + return quals; + } // Vertex function inputs if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) @@ -5188,6 +7626,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { case BuiltInInvocationId: case BuiltInPrimitiveId: + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); case BuiltInPatchVertices: return ""; @@ -5239,18 +7679,25 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in // Fragment function inputs if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput) { - string quals = ""; + string quals; if (is_builtin) { switch (builtin) { + case BuiltInViewIndex: + if (!msl_options.multiview) + break; + /* fallthrough */ case BuiltInFrontFacing: case BuiltInPointCoord: case BuiltInFragCoord: case BuiltInSampleId: case BuiltInSampleMask: case BuiltInLayer: + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: quals = builtin_qualifier(builtin); + break; default: break; @@ -5268,6 +7715,20 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in quals = string("user(locn") + convert_to_string(locn) + ")"; } } + + if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV) + { + if (has_member_decoration(type.self, index, DecorationFlat) || + has_member_decoration(type.self, index, DecorationCentroid) || + has_member_decoration(type.self, index, DecorationSample) || + has_member_decoration(type.self, index, DecorationNoPerspective)) + { + // NoPerspective is baked into the builtin type. + SPIRV_CROSS_THROW( + "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs."); + } + } + // Don't bother decorating integers with the 'flat' attribute; it's // the default (in fact, the only option). Also don't bother with the // FragCoord builtin; it's always noperspective on Metal. @@ -5304,6 +7765,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in quals += "center_no_perspective"; } } + if (!quals.empty()) return " [[" + quals + "]]"; } @@ -5315,6 +7777,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { switch (builtin) { + case BuiltInFragStencilRefEXT: + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up."); + return string(" [[") + builtin_qualifier(builtin) + "]]"; + case BuiltInSampleMask: case BuiltInFragDepth: return string(" [[") + builtin_qualifier(builtin) + "]]"; @@ -5347,6 +7814,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in case BuiltInNumWorkgroups: case BuiltInLocalInvocationId: case BuiltInLocalInvocationIndex: + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage return string(" [[") + builtin_qualifier(builtin) + "]]"; default: @@ -5417,8 +7888,10 @@ string CompilerMSL::func_type_decl(SPIRType &type) execution.output_vertices, ") ]] vertex"); break; case ExecutionModelFragment: - entry_type = - execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment"; + entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) || + execution.flags.get(ExecutionModePostDepthCoverage) ? + "[[ early_fragment_tests ]] fragment" : + "fragment"; break; case ExecutionModelTessellationControl: if (!msl_options.supports_msl_version(1, 2)) @@ -5442,21 +7915,37 @@ string CompilerMSL::func_type_decl(SPIRType &type) string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { const auto &type = get<SPIRType>(argument.basetype); + return get_type_address_space(type, argument.self, true); +} +string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + auto *var = maybe_get<SPIRVariable>(id); + if (var && type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + + const char *addr_space = nullptr; switch (type.storage) { case StorageClassWorkgroup: - return "threadgroup"; + addr_space = "threadgroup"; + break; case StorageClassStorageBuffer: { // For arguments from variable pointers, we use the write count deduction, so // we should not assume any constness here. Only for global SSBOs. bool readonly = false; - if (has_decoration(type.self, DecorationBlock)) - readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); + if (!var || has_decoration(type.self, DecorationBlock)) + readonly = flags.get(DecorationNonWritable); - return readonly ? "const device" : "device"; + addr_space = readonly ? "const device" : "device"; + break; } case StorageClassUniform: @@ -5466,93 +7955,58 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { bool ssbo = has_decoration(type.self, DecorationBufferBlock); if (ssbo) - { - bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); - return readonly ? "const device" : "device"; - } + addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; else - return "constant"; + addr_space = "constant"; } + else if (!argument) + addr_space = "constant"; break; case StorageClassFunction: case StorageClassGeneric: - // No address space for plain values. - return type.pointer ? "thread" : ""; + break; case StorageClassInput: - if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id) - return "threadgroup"; + if (get_execution_model() == ExecutionModelTessellationControl && var && + var->basevariable == stage_in_ptr_var_id) + addr_space = "threadgroup"; break; case StorageClassOutput: if (capture_output_to_buffer) - return "device"; + addr_space = "device"; break; default: break; } - return "thread"; + if (!addr_space) + // No address space for plain values. + addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; + + return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); } -string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id) +const char *CompilerMSL::to_restrict(uint32_t id, bool space) { - switch (type.storage) - { - case StorageClassWorkgroup: - return "threadgroup"; - - case StorageClassStorageBuffer: - { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable) + { + uint32_t type_id = expression_type_id(id); + auto &type = expression_type(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock))) flags = get_buffer_block_flags(id); else flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } - - case StorageClassUniform: - case StorageClassUniformConstant: - case StorageClassPushConstant: - if (type.basetype == SPIRType::Struct) - { - bool ssbo = has_decoration(type.self, DecorationBufferBlock); - if (ssbo) - { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) - flags = get_buffer_block_flags(id); - else - flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } - else - return "constant"; - } - break; - - case StorageClassFunction: - case StorageClassGeneric: - // No address space for plain values. - return type.pointer ? "thread" : ""; - - case StorageClassOutput: - if (capture_output_to_buffer) - return "device"; - break; - - default: - break; } + else + flags = get_decoration_bitset(id); - return "thread"; + return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : ""; } string CompilerMSL::entry_point_arg_stage_in() @@ -5581,8 +8035,9 @@ string CompilerMSL::entry_point_arg_stage_in() void CompilerMSL::entry_point_args_builtin(string &ep_args) { // Builtin variables + SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins; ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) { - BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; + auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); // Don't emit SamplePosition as a separate parameter. In the entry // point, we get that by calling get_sample_position() on the sample ID. @@ -5590,20 +8045,66 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) get_variable_data_type(var).basetype != SPIRType::Struct && get_variable_data_type(var).basetype != SPIRType::ControlPointArray) { + // If the builtin is not part of the active input builtin set, don't emit it. + // Relevant for multiple entry-point modules which might declare unused builtins. + if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id)) + return; + + // Remember this variable. We may need to correct its type. + active_builtins.push_back(make_pair(&var, bi_type)); + + // These builtins are emitted specially. If we pass this branch, the builtin directly matches + // a MSL builtin. if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation && bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner && bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize && - bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance) + bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask && + bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV && + bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask && + bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask && bi_type != BuiltInDeviceIndex && + ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) || + bi_type != BuiltInViewIndex) && + (get_execution_model() == ExecutionModelGLCompute || + (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) || + (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize))) { if (!ep_args.empty()) ep_args += ", "; - ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id); - ep_args += " [[" + builtin_qualifier(bi_type) + "]]"; + ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); + ep_args += " [[" + builtin_qualifier(bi_type); + if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0."); + if (!msl_options.is_ios()) + SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS."); + ep_args += ", post_depth_coverage"; + } + ep_args += "]]"; } } + + if (var.storage == StorageClassInput && + has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase)) + { + // This is a special implicit builtin, not corresponding to any SPIR-V builtin, + // which holds the base that was passed to vkCmdDispatchBase(). If it's present, + // assume we emitted it for a good reason. + assert(msl_options.supports_msl_version(1, 2)); + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]"; + } }); + // Correct the types of all encountered active builtins. We couldn't do this before + // because ensure_correct_builtin_type() may increase the bound, which isn't allowed + // while iterating over IDs. + for (auto &var : active_builtins) + var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second); + // Vertex and instance index built-ins if (needs_vertex_idx_arg) ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty()); @@ -5670,6 +8171,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) { string ep_args = entry_point_arg_stage_in(); + Bitset claimed_bindings; for (uint32_t i = 0; i < kMaxArgumentBuffers; i++) { @@ -5684,12 +8186,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); - ep_args += " [[buffer(" + convert_to_string(i) + ")]]"; + // Check if the argument buffer binding itself has been remapped. + uint32_t buffer_binding; + auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding }); + if (itr != end(resource_bindings)) + { + buffer_binding = itr->second.first.msl_buffer; + itr->second.second = true; + } + else + { + // As a fallback, directly map desc set <-> binding. + // If that was taken, take the next buffer binding. + if (claimed_bindings.get(i)) + buffer_binding = next_metal_resource_index_buffer; + else + buffer_binding = i; + } + + claimed_bindings.set(buffer_binding); + + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id); + ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]"; - // Makes it more practical for testing, since the push constant block can occupy the first available - // buffer slot if it's not bound explicitly. - next_metal_resource_index_buffer = i + 1; + next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1); } entry_point_args_discrete_descriptors(ep_args); @@ -5701,6 +8221,28 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) return ep_args; } +const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const +{ + // Try by ID. + { + auto itr = constexpr_samplers_by_id.find(id); + if (itr != end(constexpr_samplers_by_id)) + return &itr->second; + } + + // Try by binding. + { + uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); + uint32_t binding = get_decoration(id, DecorationBinding); + + auto itr = constexpr_samplers_by_binding.find({ desc_set, binding }); + if (itr != end(constexpr_samplers_by_binding)) + return &itr->second; + } + + return nullptr; +} + void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { // Output resources, sorted by resource index & type @@ -5712,43 +8254,65 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; }; SmallVector<Resource> resources; - ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) { if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && !is_hidden_variable(var)) { auto &type = get_variable_data_type(var); - uint32_t var_id = var.self; - if (var.storage != StorageClassPushConstant) + // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. + // But we won't know when the argument buffer is encoded whether this image will have + // a NonWritable decoration. So just use discrete arguments for all storage images + // on iOS. + if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) && + var.storage != StorageClassPushConstant) { uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); if (descriptor_set_is_argument_buffer(desc_set)) return; } + const MSLConstexprSampler *constexpr_sampler = nullptr; + if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler) + { + constexpr_sampler = find_constexpr_sampler(var_id); + if (constexpr_sampler) + { + // Mark this ID as a constexpr sampler for later in case it came from set/bindings. + constexpr_samplers_by_id[var_id] = *constexpr_sampler; + } + } + if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); - resources.push_back( - { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) }); - if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0) + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; + + for (uint32_t i = 0; i < plane_count; i++) + resources.push_back({ &var, to_name(var_id), SPIRType::Image, + get_metal_resource_index(var, SPIRType::Image, i), i }); + + if (type.image.dim != DimBuffer && !constexpr_sampler) { resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, - get_metal_resource_index(var, SPIRType::Sampler) }); + get_metal_resource_index(var, SPIRType::Sampler), 0 }); } } - else if (constexpr_samplers.count(var_id) == 0) + else if (!constexpr_sampler) { // constexpr samplers are not declared as resources. add_resource_name(var_id); resources.push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); } } }); @@ -5789,17 +8353,24 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" + - convert_to_string(i); - ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) + + r.name + "_" + convert_to_string(i); + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } } else { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += + get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } break; } @@ -5813,10 +8384,25 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (!ep_args.empty()) ep_args += ", "; ep_args += image_type_glsl(type, var_id) + " " + r.name; - ep_args += " [[texture(" + convert_to_string(r.index) + ")]]"; + if (r.plane > 0) + ep_args += join(plane_name_suffix, r.plane); + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; default: - SPIRV_CROSS_THROW("Unexpected resource type"); + if (!ep_args.empty()) + ep_args += ", "; + if (!type.pointer) + ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " + + type_to_glsl(type, var_id) + "& " + r.name; + else + ep_args += type_to_glsl(type, var_id) + " " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; } } @@ -5838,28 +8424,62 @@ string CompilerMSL::entry_point_args_classic(bool append_comma) void CompilerMSL::fix_up_shader_inputs_outputs() { - // Look for sampled images. Add hooks to set up the swizzle constants. + // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { auto &type = get_variable_data_type(var); - uint32_t var_id = var.self; + bool ssbo = has_decoration(type.self, DecorationBufferBlock); - if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || - var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && - !is_hidden_variable(var)) + if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var)) { if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) { auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { - auto &aux_type = expression_type(aux_buffer_id); bool is_array_type = !type.array.empty(); - // If we have an array of images, we need to be able to index into it, so take a pointer instead. - statement("constant uint32_t", is_array_type ? "* " : "& ", to_swizzle_expression(var_id), - is_array_type ? " = &" : " = ", to_name(aux_buffer_id), ".", - to_member_name(aux_type, k_aux_mbr_idx_swizzle_const), "[", - convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + { + statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id), + is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), + ".spvSwizzleConstants", "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + else + { + // If we have an array of images, we need to be able to index into it, so take a pointer instead. + statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id), + is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + }); + } + } + else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) && + !is_hidden_variable(var)) + { + if (buffers_requiring_array_length.count(var.self)) + { + auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { + bool is_array_type = !type.array.empty(); + + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + { + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), + ".spvBufferSizeConstants", "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + else + { + // If we have an array of images, we need to be able to index into it, so take a pointer instead. + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[", + convert_to_string(get_metal_resource_index(var, type.basetype)), "];"); + } }); } } @@ -5911,6 +8531,225 @@ void CompilerMSL::fix_up_shader_inputs_outputs() entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); } break; + case BuiltInSubgroupLocalInvocationId: + // This is natively supported in compute shaders. + if (get_execution_model() == ExecutionModelGLCompute) + break; + + // This is natively supported in fragment shaders in MSL 2.2. + if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + break; + + if (msl_options.is_ios()) + SPIRV_CROSS_THROW( + "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS."); + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW( + "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1."); + + // Shaders other than compute shaders don't support the SIMD-group + // builtins directly, but we can emulate them using the SIMD-group + // functions. This might break if some of the subgroup terminated + // before reaching the entry point. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = simd_prefix_exclusive_sum(1);"); + }); + break; + case BuiltInSubgroupSize: + // This is natively supported in compute shaders. + if (get_execution_model() == ExecutionModelGLCompute) + break; + + // This is natively supported in fragment shaders in MSL 2.2. + if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + break; + + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS."); + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1."); + + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); }); + break; + case BuiltInSubgroupEqMask: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + }); + break; + case BuiltInSubgroupGeMask: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + // Case where index < 32, size < 32: + // mask0 = bfe(0xFFFFFFFF, index, size - index); + // mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0 + // Case where index < 32 but size >= 32: + // mask0 = bfe(0xFFFFFFFF, index, 32 - index); + // mask1 = bfe(0xFFFFFFFF, 0, size - 32); + // Case where index >= 32: + // mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0 + // mask1 = bfe(0xFFFFFFFF, index - 32, size - index); + // This is expressed without branches to avoid divergent + // control flow--hence the complicated min/max expressions. + // This is further complicated by the fact that if you attempt + // to bfe out-of-bounds on Metal, undefined behavior is the + // result. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + }); + break; + case BuiltInSubgroupGtMask: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + // The same logic applies here, except now the index is one + // more than the subgroup invocation ID. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + }); + break; + case BuiltInSubgroupLeMask: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + }); + break; + case BuiltInSubgroupLtMask: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + }); + break; + case BuiltInViewIndex: + if (!msl_options.multiview) + { + // According to the Vulkan spec, when not running under a multiview + // render pass, ViewIndex is 0. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); + }); + } + else if (msl_options.view_index_from_device_index) + { + // In this case, we take the view index from that of the device we're running on. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + // We actually don't want to set the render_target_array_index here. + // Since every physical device is rendering a different view, + // there's no need for layered rendering here. + } + else if (get_execution_model() == ExecutionModelFragment) + { + // Because we adjusted the view index in the vertex shader, we have to + // adjust it back here. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];"); + }); + } + else if (get_execution_model() == ExecutionModelVertex) + { + // Metal provides no special support for multiview, so we smuggle + // the view index in the instance index. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id), + " % ", to_expression(view_mask_buffer_id), "[1];"); + statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id), + "[1];"); + }); + // In addition to setting the variable itself, we also need to + // set the render_target_array_index with it on output. We have to + // offset this by the base view index, because Metal isn't in on + // our little game here. + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ", + to_expression(view_mask_buffer_id), "[0];"); + }); + } + break; + case BuiltInDeviceIndex: + // Metal pipelines belong to the devices which create them, so we'll + // need to create a MTLPipelineState for every MTLDevice in a grouped + // VkDevice. We can assume, then, that the device index is constant. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + break; + case BuiltInWorkgroupId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId)) + break; + + // The vkCmdDispatchBase() command lets the client set the base value + // of WorkgroupId. Metal has no direct equivalent; we must make this + // adjustment ourselves. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";"); + }); + break; + case BuiltInGlobalInvocationId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId)) + break; + + // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize. + // This needs to be adjusted too. + entry_func.fixup_hooks_in.push_back([=]() { + auto &execution = this->get_entry_point(); + uint32_t workgroup_size_id = execution.workgroup_size.constant; + if (workgroup_size_id) + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * ", to_expression(workgroup_size_id), ";"); + else + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ", + execution.workgroup_size.z, ");"); + }); + break; default: break; } @@ -5919,65 +8758,96 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } // Returns the Metal index of the resource of the specified type as used by the specified variable. -uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype) +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane) { auto &execution = get_entry_point(); auto &var_dec = ir.meta[var.self].decoration; + auto &var_type = get<SPIRType>(var.basetype); uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set; uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding; - // If a matching binding has been specified, find and use it - auto itr = find_if(begin(resource_bindings), end(resource_bindings), - [&](const pair<MSLResourceBinding, bool> &resource) -> bool { - return var_desc_set == resource.first.desc_set && var_binding == resource.first.binding && - execution.model == resource.first.stage; - }); + // If a matching binding has been specified, find and use it. + auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding }); + + auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ? + SPIRVCrossDecorationResourceIndexSecondary : + SPIRVCrossDecorationResourceIndexPrimary; + if (plane == 1) + resource_decoration = SPIRVCrossDecorationResourceIndexTertiary; + if (plane == 2) + resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary; if (itr != end(resource_bindings)) { - itr->second = true; + auto &remap = itr->second; + remap.second = true; switch (basetype) { - case SPIRType::Struct: - return itr->first.msl_buffer; case SPIRType::Image: - return itr->first.msl_texture; + set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane); + return remap.first.msl_texture + plane; case SPIRType::Sampler: - return itr->first.msl_sampler; + set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler); + return remap.first.msl_sampler; default: - return 0; + set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer); + return remap.first.msl_buffer; } } - // If there is no explicit mapping of bindings to MSL, use the declared binding. - if (has_decoration(var.self, DecorationBinding)) - return get_decoration(var.self, DecorationBinding); + // If we have already allocated an index, keep using it. + if (has_extended_decoration(var.self, resource_decoration)) + return get_extended_decoration(var.self, resource_decoration); + + // If we did not explicitly remap, allocate bindings on demand. + // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different. uint32_t binding_stride = 1; auto &type = get<SPIRType>(var.basetype); for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) - binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar(); + binding_stride *= to_array_size_literal(type, i); + + assert(binding_stride != 0); - // If a binding has not been specified, revert to incrementing resource indices + // If a binding has not been specified, revert to incrementing resource indices. uint32_t resource_index; - switch (basetype) + + bool allocate_argument_buffer_ids = false; + uint32_t desc_set = 0; + + if (var.storage != StorageClassPushConstant) { - case SPIRType::Struct: - resource_index = next_metal_resource_index_buffer; - next_metal_resource_index_buffer += binding_stride; - break; - case SPIRType::Image: - resource_index = next_metal_resource_index_texture; - next_metal_resource_index_texture += binding_stride; - break; - case SPIRType::Sampler: - resource_index = next_metal_resource_index_sampler; - next_metal_resource_index_sampler += binding_stride; - break; - default: - resource_index = 0; - break; + desc_set = get_decoration(var.self, DecorationDescriptorSet); + allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set); + } + + if (allocate_argument_buffer_ids) + { + // Allocate from a flat ID binding space. + resource_index = next_metal_resource_ids[desc_set]; + next_metal_resource_ids[desc_set] += binding_stride; + } + else + { + // Allocate from plain bindings which are allocated per resource type. + switch (basetype) + { + case SPIRType::Image: + resource_index = next_metal_resource_index_texture; + next_metal_resource_index_texture += binding_stride; + break; + case SPIRType::Sampler: + resource_index = next_metal_resource_index_sampler; + next_metal_resource_index_sampler += binding_stride; + break; + default: + resource_index = next_metal_resource_index_buffer; + next_metal_resource_index_buffer += binding_stride; + break; + } } + + set_extended_decoration(var.self, resource_decoration, resource_index); return resource_index; } @@ -6008,13 +8878,28 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (constref) decl += "const "; + // If this is a combined image-sampler for a 2D image with floating-point type, + // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter + // for a global, then we need to emit a "dynamic" combined image-sampler. + // Unfortunately, this is necessary to properly support passing around + // combined image-samplers with Y'CbCr conversions on them. + bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage && + type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) && + spv_function_implementations.count(SPVFuncImplDynamicImageSampler); + bool builtin = is_builtin_variable(var); - if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id) + if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl += type_to_glsl(type, arg.id); else if (builtin) - decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn))); + decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id); else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) decl += join(type_to_glsl(type, arg.id), "*"); + else if (is_dynamic_img_sampler) + { + decl += join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">"); + // Mark the variable so that we can handle passing it to another function. + set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + } else decl += type_to_glsl(type, arg.id); @@ -6037,6 +8922,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // non-constant arrays, but we can create thread const from constant. decl = string("thread const ") + decl; decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } decl += to_expression(name_id); decl += ")"; decl += type_to_array_glsl(type); @@ -6057,20 +8948,36 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (msl_options.argument_buffers) { - // An awkward case where we need to emit *more* address space declarations (yay!). - // An example is where we pass down an array of buffer pointers to leaf functions. - // It's a constant array containing pointers to constants. - // The pointer array is always constant however. E.g. - // device SSBO * constant (&array)[N]. - // const device SSBO * constant (&array)[N]. - // constant SSBO * constant (&array)[N]. - // However, this only matters for argument buffers, since for MSL 1.0 style codegen, - // we emit the buffer array on stack instead, and that seems to work just fine apparently. - if (storage == StorageClassUniform || storage == StorageClassStorageBuffer) - decl += " constant"; + uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet); + if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && + descriptor_set_is_argument_buffer(desc_set)) + { + // An awkward case where we need to emit *more* address space declarations (yay!). + // An example is where we pass down an array of buffer pointers to leaf functions. + // It's a constant array containing pointers to constants. + // The pointer array is always constant however. E.g. + // device SSBO * constant (&array)[N]. + // const device SSBO * constant (&array)[N]. + // constant SSBO * constant (&array)[N]. + // However, this only matters for argument buffers, since for MSL 1.0 style codegen, + // we emit the buffer array on stack instead, and that seems to work just fine apparently. + + // If the argument was marked as being in device address space, any pointer to member would + // be const device, not constant. + if (argument_buffer_device_storage_mask & (1u << desc_set)) + decl += " const device"; + else + decl += " constant"; + } } decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } decl += to_expression(name_id); decl += ")"; decl += type_to_array_glsl(type); @@ -6088,6 +8995,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } decl += "&"; decl += " "; + decl += to_restrict(name_id); decl += to_expression(name_id); } else @@ -6266,6 +9174,7 @@ void CompilerMSL::replace_illegal_names() "M_2_SQRTPI", "M_SQRT2", "M_SQRT1_2", + "quad_broadcast", }; static const unordered_set<string> illegal_func_names = { @@ -6465,6 +9374,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) // Pointer? if (type.pointer) { + const char *restrict_kw; type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id)); switch (type.basetype) { @@ -6476,6 +9386,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) default: // Anything else can be a raw pointer. type_name += "*"; + restrict_kw = to_restrict(id); + if (*restrict_kw) + { + type_name += " "; + type_name += restrict_kw; + } break; } return type_name; @@ -6527,10 +9443,14 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) type_name = "uint"; break; case SPIRType::Int64: - type_name = "long"; // Currently unsupported + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above."); + type_name = "long"; break; case SPIRType::UInt64: - type_name = "size_t"; + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above."); + type_name = "ulong"; break; case SPIRType::Half: type_name = "half"; @@ -6748,6 +9668,234 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) return img_type_name; } +void CompilerMSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast<Op>(i.op); + + // Metal 2.0 is required. iOS only supports quad ops. macOS only supports + // broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1). + // Note that iOS makes no distinction between a quad-group and a subgroup; + // all subgroups are quad-groups there. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up."); + + if (msl_options.is_ios()) + { + switch (op) + { + default: + SPIRV_CROSS_THROW("iOS only supports quad-group operations."); + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + break; + } + } + + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + { + switch (op) + { + default: + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.0 and up."); + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + break; + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar()); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "simd_is_first()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], + msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount"); + else if (operation == GroupOperationInclusiveScan) + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, + "spvSubgroupBallotInclusiveBitCount"); + else if (operation == GroupOperationExclusiveScan) + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, + "spvSubgroupBallotExclusiveBitCount"); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], + msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], + msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], + msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down"); + break; + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "simd_all"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "simd_any"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupAllEqual"); + break; + + // clang-format off +#define MSL_GROUP_OP(op, msl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "simd_prefix_inclusive_" #msl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "simd_prefix_exclusive_" #msl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + MSL_GROUP_OP(FAdd, sum) + MSL_GROUP_OP(FMul, product) + MSL_GROUP_OP(IAdd, sum) + MSL_GROUP_OP(IMul, product) +#undef MSL_GROUP_OP + // The others, unfortunately, don't support InclusiveScan or ExclusiveScan. +#define MSL_GROUP_OP(op, msl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationExclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + MSL_GROUP_OP(FMin, min) + MSL_GROUP_OP(FMax, max) + MSL_GROUP_OP(SMin, min) + MSL_GROUP_OP(SMax, max) + MSL_GROUP_OP(UMin, min) + MSL_GROUP_OP(UMax, max) + MSL_GROUP_OP(BitwiseAnd, and) + MSL_GROUP_OP(BitwiseOr, or) + MSL_GROUP_OP(BitwiseXor, xor) + MSL_GROUP_OP(LogicalAnd, and) + MSL_GROUP_OP(LogicalOr, or) + MSL_GROUP_OP(LogicalXor, xor) + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + // We can implement this easily based on the following table giving + // the target lane ID from the direction and current lane ID: + // Direction + // | 0 | 1 | 2 | + // ---+---+---+---+ + // L 0 | 1 2 3 + // a 1 | 0 3 2 + // n 2 | 3 0 1 + // e 3 | 2 1 0 + // Notice that target = source ^ (direction + 1). + uint32_t mask = get<SPIRConstant>(ops[4]).scalar() + 1; + uint32_t mask_id = ir.increase_bound_by(1); + set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false); + emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor"); + break; + } + + case OpGroupNonUniformQuadBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { if (out_type.basetype == in_type.basetype) @@ -6807,6 +9955,7 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInCullDistance: case BuiltInLayer: case BuiltInFragDepth: + case BuiltInFragStencilRefEXT: case BuiltInSampleMask: if (get_execution_model() == ExecutionModelTessellationControl) break; @@ -6815,6 +9964,12 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) break; + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); + break; + case BuiltInTessLevelOuter: if (get_execution_model() == ExecutionModelTessellationEvaluation) { @@ -6879,7 +10034,14 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInPointSize: return "point_size"; case BuiltInPosition: - return "position"; + if (position_invariant) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up."); + return "position, invariant"; + } + else + return "position"; case BuiltInLayer: return "render_target_array_index"; case BuiltInViewportIndex: @@ -6900,6 +10062,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return "threadgroup_position_in_grid"; case ExecutionModelTessellationEvaluation: return "patch_id"; + case ExecutionModelFragment: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2."); + return "primitive_id"; default: SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model."); } @@ -6928,6 +10096,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInSamplePosition: // Shouldn't be reached. SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL."); + case BuiltInViewIndex: + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders."); + // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index, + // so we can get it from there. + return "render_target_array_index"; // Fragment function out case BuiltInFragDepth: @@ -6938,6 +10112,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) else return "depth(any)"; + case BuiltInFragStencilRefEXT: + return "stencil"; + // Compute function in case BuiltInGlobalInvocationId: return "thread_position_in_grid"; @@ -6954,13 +10131,75 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInLocalInvocationIndex: return "thread_index_in_threadgroup"; + case BuiltInSubgroupSize: + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders."); + return "threads_per_simdgroup"; + } + else + { + // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0, + // but not in fragment. + return "thread_execution_width"; + } + + case BuiltInNumSubgroups: + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup"; + + case BuiltInSubgroupId: + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; + + case BuiltInSubgroupLocalInvocationId: + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders."); + return "thread_index_in_simdgroup"; + } + else + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; + } + + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); + + case BuiltInBaryCoordNV: + // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + else if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); + return "barycentric_coord, center_perspective"; + + case BuiltInBaryCoordNoPerspNV: + // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("Barycentrics not supported on iOS."); + else if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); + return "barycentric_coord, center_no_perspective"; + default: return "unsupported-built-in"; } } // Returns an MSL string type declaration for a SPIR-V builtin -string CompilerMSL::builtin_type_decl(BuiltIn builtin) +string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) { const SPIREntryPoint &execution = get_entry_point(); switch (builtin) @@ -7030,11 +10269,24 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin) return "uint"; case BuiltInSamplePosition: return "float2"; + case BuiltInViewIndex: + return "uint"; + + case BuiltInHelperInvocation: + return "bool"; + + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + // Use the type as declared, can be 1, 2 or 3 components. + return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id))); // Fragment function out case BuiltInFragDepth: return "float"; + case BuiltInFragStencilRefEXT: + return "uint"; + // Compute function in case BuiltInGlobalInvocationId: case BuiltInLocalInvocationId: @@ -7042,10 +10294,20 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin) case BuiltInWorkgroupId: return "uint3"; case BuiltInLocalInvocationIndex: + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: return "uint"; + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + return "uint4"; - case BuiltInHelperInvocation: - return "bool"; + case BuiltInDeviceIndex: + return "int"; default: return "unsupported-built-in-type"; @@ -7066,11 +10328,101 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) return bi_arg; } -// Returns the byte size of a struct member. -size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const +{ + if (member_is_remapped_physical_type(type, index)) + return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID)); + else + return get<SPIRType>(type.member_types[index]); +} + +uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ + // Array stride in MSL is always size * array_size. sizeof(float3) == 16, + // unlike GLSL and HLSL where array stride would be 16 and size 12. + + // We could use parent type here and recurse, but that makes creating physical type remappings + // far more complicated. We'd rather just create the final type, and ignore having to create the entire type + // hierarchy in order to compute this value, so make a temporary type on the stack. + + auto basic_type = type; + basic_type.array.clear(); + basic_type.array_size_literal.clear(); + uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major); + + uint32_t dimensions = uint32_t(type.array.size()); + assert(dimensions > 0); + dimensions--; + + // Multiply together every dimension, except the last one. + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(type, dim); + value_size *= max(array_size, 1u); + } + + return value_size; +} + +uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_array_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const +{ + // For packed matrices, we just use the size of the vector type. + // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type. + if (packed) + return (type.width / 8) * (row_major ? type.columns : type.vecsize); + else + return get_declared_type_alignment_msl(type, false, row_major); +} + +uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const { - auto &type = get<SPIRType>(struct_type.member_types[index]); + return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment, + bool ignore_padding) const +{ + // If we have a target size, that is the declared size as well. + if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget)) + return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget); + + if (struct_type.member_types.empty()) + return 0; + + uint32_t mbr_cnt = uint32_t(struct_type.member_types.size()); + + // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. + uint32_t alignment = 1; + + if (!ignore_alignment) + { + for (uint32_t i = 0; i < mbr_cnt; i++) + { + uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i); + alignment = max(alignment, mbr_alignment); + } + } + + // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends + // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment. + uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1); + uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1); + msl_size = (msl_size + alignment - 1) & ~(alignment - 1); + return msl_size; +} +// Returns the byte size of a struct member. +uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ switch (type.basetype) { case SPIRType::Unknown: @@ -7083,40 +10435,47 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, default: { - // For arrays, we can use ArrayStride to get an easy check. - // Runtime arrays will have zero size so force to min of one. if (!type.array.empty()) { uint32_t array_size = to_array_size_literal(type); - return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u); + return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u); } if (type.basetype == SPIRType::Struct) + return get_declared_struct_size_msl(type); + + if (is_packed) { - // The size of a struct in Metal is aligned up to its natural alignment. - auto size = get_declared_struct_size(type); - auto alignment = get_declared_struct_member_alignment(struct_type, index); - return (size + alignment - 1) & ~(alignment - 1); + return type.vecsize * type.columns * (type.width / 8); } + else + { + // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; - uint32_t component_size = type.width / 8; - uint32_t vecsize = type.vecsize; - uint32_t columns = type.columns; + if (row_major) + swap(vecsize, columns); - // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. - if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked)) - vecsize = 4; + if (vecsize == 3) + vecsize = 4; - return component_size * vecsize * columns; + return vecsize * columns * (type.width / 8); + } } } } -// Returns the byte alignment of a struct member. -size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const +uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const { - auto &type = get<SPIRType>(struct_type.member_types[index]); + return get_declared_type_size_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} +// Returns the byte alignment of a type. +uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ switch (type.basetype) { case SPIRType::Unknown: @@ -7127,12 +10486,19 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_ case SPIRType::Sampler: SPIRV_CROSS_THROW("Querying alignment of opaque object."); + case SPIRType::Int64: + SPIRV_CROSS_THROW("long types are not supported in buffers in MSL."); + case SPIRType::UInt64: + SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL."); + case SPIRType::Double: + SPIRV_CROSS_THROW("double types are not supported in buffers in MSL."); + case SPIRType::Struct: { // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. uint32_t alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) - alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i))); + alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i))); return alignment; } @@ -7141,25 +10507,28 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_ // Alignment of packed type is the same as the underlying component or column size. // Alignment of unpacked type is the same as the vector size. // Alignment of 3-elements vector is the same as 4-elements (including packed using column). - if (member_is_packed_type(struct_type, index)) - { - // This is getting pretty complicated. - // The special case of array of float/float2 needs to be handled here. - uint32_t packed_type_id = - get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType); - const SPIRType *packed_type = packed_type_id != 0 ? &get<SPIRType>(packed_type_id) : nullptr; - if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) && - packed_type->basetype != SPIRType::Struct) - return (packed_type->width / 8) * 4; - else - return (type.width / 8) * (type.columns == 3 ? 4 : type.columns); + if (is_packed) + { + // If we have packed_T and friends, the alignment is always scalar. + return type.width / 8; } else - return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize); + { + // This is the general rule for MSL. Size == alignment. + uint32_t vecsize = row_major ? type.columns : type.vecsize; + return (type.width / 8) * (vecsize == 3 ? 4 : vecsize); + } } } } +uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_alignment_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + bool CompilerMSL::skip_argument(uint32_t) const { return false; @@ -7207,7 +10576,7 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar case OpImageDrefGather: compiler.has_sampled_images = compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2])); - compiler.needs_aux_buffer_def = compiler.needs_aux_buffer_def || compiler.has_sampled_images; + compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images; break; default: break; @@ -7215,6 +10584,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar return true; } +// If a needed custom function wasn't added before, add it and force a recompile. +void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func) +{ + if (spv_function_implementations.count(spv_func) == 0) + { + spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + force_recompile(); + } +} + bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length) { // Since MSL exists in a single execution scope, function prototype declarations are not @@ -7267,6 +10647,37 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui uses_atomics = true; break; + case OpGroupNonUniformInverseBallot: + needs_subgroup_invocation_id = true; + break; + + case OpGroupNonUniformBallotBitCount: + if (args[3] != GroupOperationReduce) + needs_subgroup_invocation_id = true; + break; + + case OpArrayLength: + { + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + compiler.buffers_requiring_array_length.insert(var->self); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + // OpArrayLength might want to know if taking ArrayLength of an array of SSBOs. + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + break; + } + default: break; } @@ -7361,32 +10772,6 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]]; if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native) return SPVFuncImplTexelBufferCoords; - - if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - - break; - } - - case OpImageSampleExplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleDrefImplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageGather: - case OpImageDrefGather: - if (compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - break; - - case OpCompositeConstruct: - { - auto &type = compiler.get<SPIRType>(args[0]); - if (type.array.size() > 1) // We need to use copies to build the composite. - return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size() - 1); break; } @@ -7395,7 +10780,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o uint32_t extension_set = args[2]; if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL) { - GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]); + auto op_450 = static_cast<GLSLstd450>(args[3]); switch (op_450) { case GLSLstd450Radians: @@ -7410,6 +10795,27 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o return SPVFuncImplFindUMsb; case GLSLstd450SSign: return SPVFuncImplSSign; + case GLSLstd450Reflect: + { + auto &type = compiler.get<SPIRType>(args[0]); + if (type.vecsize == 1) + return SPVFuncImplReflectScalar; + break; + } + case GLSLstd450Refract: + { + auto &type = compiler.get<SPIRType>(args[0]); + if (type.vecsize == 1) + return SPVFuncImplRefractScalar; + break; + } + case GLSLstd450FaceForward: + { + auto &type = compiler.get<SPIRType>(args[0]); + if (type.vecsize == 1) + return SPVFuncImplFaceForwardScalar; + break; + } case GLSLstd450MatrixInverse: { auto &mat_type = compiler.get<SPIRType>(args[0]); @@ -7433,6 +10839,25 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o break; } + case OpGroupNonUniformBallot: + return SPVFuncImplSubgroupBallot; + + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + return SPVFuncImplSubgroupBallotBitExtract; + + case OpGroupNonUniformBallotFindLSB: + return SPVFuncImplSubgroupBallotFindLSB; + + case OpGroupNonUniformBallotFindMSB: + return SPVFuncImplSubgroupBallotFindMSB; + + case OpGroupNonUniformBallotBitCount: + return SPVFuncImplSubgroupBallotBitCount; + + case OpGroupNonUniformAllEqual: + return SPVFuncImplSubgroupAllEqual; + default: break; } @@ -7497,14 +10922,20 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) meta.members.resize(max(type.member_types.size(), meta.members.size())); } -void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) +void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler) { auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype); if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type."); if (!type.array.empty()) SPIRV_CROSS_THROW("Can not remap array of samplers."); - constexpr_samplers[id] = sampler; + constexpr_samplers_by_id[id] = sampler; +} + +void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, + const MSLConstexprSampler &sampler) +{ + constexpr_samplers_by_binding[{ desc_set, binding }] = sampler; } void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) @@ -7529,6 +10960,15 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp case BuiltInNumWorkgroups: case BuiltInLayer: case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInPrimitiveId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInViewIndex: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + case BuiltInBaseVertex: expected_type = SPIRType::UInt; break; @@ -7569,6 +11009,9 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr { case BuiltInLayer: case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInPrimitiveId: + case BuiltInViewIndex: expected_type = SPIRType::UInt; break; @@ -7639,9 +11082,14 @@ void CompilerMSL::analyze_argument_buffers() string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; }; SmallVector<Resource> resources_in_set[kMaxArgumentBuffers]; + bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {}; + bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {}; + bool needs_buffer_sizes = false; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) { if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassStorageBuffer) && @@ -7658,36 +11106,116 @@ void CompilerMSL::analyze_argument_buffers() if (desc_set >= kMaxArgumentBuffers) SPIRV_CROSS_THROW("Descriptor set index is out of range."); + const MSLConstexprSampler *constexpr_sampler = nullptr; + if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler) + { + constexpr_sampler = find_constexpr_sampler(var_id); + if (constexpr_sampler) + { + // Mark this ID as a constexpr sampler for later in case it came from set/bindings. + constexpr_samplers_by_id[var_id] = *constexpr_sampler; + } + } + if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); - uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image); - uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); - - // Avoid trivial conflicts where we didn't remap. - // This will let us at least compile test cases without having to instrument remaps. - if (sampler_resource_index == image_resource_index) - sampler_resource_index += type.array.empty() ? 1 : to_array_size_literal(type); + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; - resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index }); + for (uint32_t i = 0; i < plane_count; i++) + { + uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::Image, image_resource_index, i }); + } - if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0) + if (type.image.dim != DimBuffer && !constexpr_sampler) { + uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); resources_in_set[desc_set].push_back( - { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index }); + { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 }); } } - else if (constexpr_samplers.count(var_id) == 0) + else if (!constexpr_sampler) { // constexpr samplers are not declared as resources. - add_resource_name(var_id); - resources_in_set[desc_set].push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + if (!msl_options.is_ios() || type.basetype != SPIRType::Image || type.image.sampled != 2) + { + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); + } + } + + // Check if this descriptor set needs a swizzle buffer. + if (needs_swizzle_buffer_def && is_sampled_image_type(type)) + set_needs_swizzle_buffer[desc_set] = true; + else if (buffers_requiring_array_length.count(var_id) != 0) + { + set_needs_buffer_sizes[desc_set] = true; + needs_buffer_sizes = true; } } }); + if (needs_swizzle_buffer_def || needs_buffer_sizes) + { + uint32_t uint_ptr_type_id = 0; + + // We might have to add a swizzle buffer resource to the set. + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) + { + if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set]) + continue; + + if (uint_ptr_type_id == 0) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_id = offset; + uint_ptr_type_id = offset + 1; + + // Create a buffer to hold extra data, including the swizzle constants. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_pointer = uint_type; + uint_type_pointer.pointer = true; + uint_type_pointer.pointer_depth = 1; + uint_type_pointer.parent_type = type_id; + uint_type_pointer.storage = StorageClassUniform; + set<SPIRType>(uint_ptr_type_id, uint_type_pointer); + set_decoration(uint_ptr_type_id, DecorationArrayStride, 4); + } + + if (set_needs_swizzle_buffer[desc_set]) + { + uint32_t var_id = ir.increase_bound_by(1); + auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant); + set_name(var_id, "spvSwizzleConstants"); + set_decoration(var_id, DecorationDescriptorSet, desc_set); + set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + } + + if (set_needs_buffer_sizes[desc_set]) + { + uint32_t var_id = ir.increase_bound_by(1); + auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant); + set_name(var_id, "spvBufferSizeConstants"); + set_decoration(var_id, DecorationDescriptorSet, desc_set); + set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + } + } + } + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) { auto &resources = resources_in_set[desc_set]; @@ -7702,8 +11230,20 @@ void CompilerMSL::analyze_argument_buffers() argument_buffer_ids[desc_set] = next_id; auto &buffer_type = set<SPIRType>(type_id); - buffer_type.storage = StorageClassUniform; + buffer_type.basetype = SPIRType::Struct; + + if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0) + { + buffer_type.storage = StorageClassStorageBuffer; + // Make sure the argument buffer gets marked as const device. + set_decoration(next_id, DecorationNonWritable); + // Need to mark the type as a Block to enable this. + set_decoration(type_id, DecorationBlock); + } + else + buffer_type.storage = StorageClassUniform; + set_name(type_id, join("spvDescriptorSetBuffer", desc_set)); auto &ptr_type = set<SPIRType>(ptr_type_id); @@ -7727,6 +11267,8 @@ void CompilerMSL::analyze_argument_buffers() auto &var = *resource.var; auto &type = get_variable_data_type(var); string mbr_name = ensure_valid_name(resource.name, "m"); + if (resource.plane > 0) + mbr_name += join(plane_name_suffix, resource.plane); set_member_name(buffer_type.self, member_index, mbr_name); if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler) @@ -7754,12 +11296,22 @@ void CompilerMSL::analyze_argument_buffers() } else { + uint32_t binding = get_decoration(var.self, DecorationBinding); + SetBindingPair pair = { desc_set, binding }; + if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || resource.basetype == SPIRType::SampledImage) { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); - set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + if (resource.plane == 0) + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else if (buffers_requiring_dynamic_offset.count(pair)) + { + // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later. + buffer_type.member_types.push_back(var.basetype); + buffers_requiring_dynamic_offset[pair].second = var.self; } else { @@ -7772,7 +11324,7 @@ void CompilerMSL::analyze_argument_buffers() } } - set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationArgumentBufferID, + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary, resource.index); set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, var.self); @@ -7780,3 +11332,35 @@ void CompilerMSL::analyze_argument_buffers() } } } + +bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const +{ + return desc_set == other.desc_set && binding == other.binding; +} + +bool CompilerMSL::SetBindingPair::operator<(const SetBindingPair &other) const +{ + return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding); +} + +bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const +{ + return model == other.model && desc_set == other.desc_set && binding == other.binding; +} + +size_t CompilerMSL::InternalHasher::operator()(const SetBindingPair &value) const +{ + // Quality of hash doesn't really matter here. + auto hash_set = std::hash<uint32_t>()(value.desc_set); + auto hash_binding = std::hash<uint32_t>()(value.binding); + return (hash_set * 0x10001b31) ^ hash_binding; +} + +size_t CompilerMSL::InternalHasher::operator()(const StageSetBinding &value) const +{ + // Quality of hash doesn't really matter here. + auto hash_model = std::hash<uint32_t>()(value.model); + auto hash_set = std::hash<uint32_t>()(value.desc_set); + auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set; + return (tmp_hash * 0x10001b31) ^ value.binding; +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp index 8d3a8ad..d16b593 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp @@ -20,6 +20,7 @@ #include "spirv_glsl.hpp" #include <map> #include <set> +#include <stddef.h> #include <unordered_map> #include <unordered_set> @@ -53,9 +54,9 @@ struct MSLVertexAttr // Matches the binding index of a MSL resource for a binding within a descriptor set. // Taken together, the stage, desc_set and binding combine to form a reference to a resource // descriptor used in a particular shading stage. -// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set, -// the binding reference we remap to will become an [[id(N)]] attribute within -// the "descriptor set" argument buffer structure. +// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, +// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we +// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a // [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. struct MSLResourceBinding @@ -121,6 +122,50 @@ enum MSLSamplerBorderColor MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff }; +enum MSLFormatResolution +{ + MSL_FORMAT_RESOLUTION_444 = 0, + MSL_FORMAT_RESOLUTION_422, + MSL_FORMAT_RESOLUTION_420, + MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +}; + +enum MSLChromaLocation +{ + MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + MSL_CHROMA_LOCATION_MIDPOINT, + MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +}; + +enum MSLComponentSwizzle +{ + MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + MSL_COMPONENT_SWIZZLE_ZERO, + MSL_COMPONENT_SWIZZLE_ONE, + MSL_COMPONENT_SWIZZLE_R, + MSL_COMPONENT_SWIZZLE_G, + MSL_COMPONENT_SWIZZLE_B, + MSL_COMPONENT_SWIZZLE_A, + MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrModelConversion +{ + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrRange +{ + MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +}; + struct MSLConstexprSampler { MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; @@ -136,13 +181,40 @@ struct MSLConstexprSampler float lod_clamp_max = 1000.0f; int max_anisotropy = 1; + // Sampler Y'CbCr conversion parameters + uint32_t planes = 0; + MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; + MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY + MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; + MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; + uint32_t bpc = 8; + bool compare_enable = false; bool lod_clamp_enable = false; bool anisotropy_enable = false; -}; + bool ycbcr_conversion_enable = false; -// Tracks the type ID and member index of a struct member -using MSLStructMemberKey = uint64_t; + MSLConstexprSampler() + { + for (uint32_t i = 0; i < 4; i++) + swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; + } + bool swizzle_is_identity() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && + swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); + } + bool swizzle_has_one_or_zero() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); + } +}; // Special constant used in a MSLResourceBinding desc_set // element to indicate the bindings for the push constants. @@ -152,11 +224,21 @@ static const uint32_t kPushConstDescSet = ~(0u); // element to indicate the bindings for the push constants. static const uint32_t kPushConstBinding = 0; -static const uint32_t kMaxArgumentBuffers = 8; +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding for swizzle buffers. +static const uint32_t kSwizzleBufferBinding = ~(1u); + +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding for buffer size buffers to support OpArrayLength. +static const uint32_t kBufferSizeBufferBinding = ~(2u); -// The current version of the aux buffer structure. It must be incremented any time a -// new field is added to the aux buffer. -#define SPIRV_CROSS_MSL_AUX_BUFFER_STRUCT_VERSION 1 +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding used for the argument buffer itself. +// This buffer binding should be kept as small as possible as all automatic bindings for buffers +// will start at max(kArgumentBufferBinding) + 1. +static const uint32_t kArgumentBufferBinding = ~(3u); + +static const uint32_t kMaxArgumentBuffers = 8; // Decompiles SPIR-V to Metal Shading Language class CompilerMSL : public CompilerGLSL @@ -174,17 +256,24 @@ public: Platform platform = macOS; uint32_t msl_version = make_msl_version(1, 2); uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers - uint32_t aux_buffer_index = 30; + uint32_t swizzle_buffer_index = 30; uint32_t indirect_params_buffer_index = 29; uint32_t shader_output_buffer_index = 28; uint32_t shader_patch_output_buffer_index = 27; uint32_t shader_tess_factor_buffer_index = 26; + uint32_t buffer_size_buffer_index = 25; + uint32_t view_mask_buffer_index = 24; + uint32_t dynamic_offsets_buffer_index = 23; uint32_t shader_input_wg_index = 0; + uint32_t device_index = 0; bool enable_point_size_builtin = true; bool disable_rasterization = false; bool capture_output_to_buffer = false; bool swizzle_texture_samples = false; bool tess_domain_origin_lower_left = false; + bool multiview = false; + bool view_index_from_device_index = false; + bool dispatch_base = false; // Enable use of MSL 2.0 indirect argument buffers. // MSL 2.0 must also be enabled. @@ -212,7 +301,7 @@ public: msl_version = make_msl_version(major, minor, patch); } - bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const { return msl_version >= make_msl_version(major, minor, patch); } @@ -243,31 +332,52 @@ public: } // Provide feedback to calling API to allow it to pass an auxiliary - // buffer if the shader needs it. - bool needs_aux_buffer() const + // swizzle buffer if the shader needs it. + bool needs_swizzle_buffer() const + { + return used_swizzle_buffer; + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. + bool needs_buffer_size_buffer() const + { + return !buffers_requiring_array_length.empty(); + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing the view mask for the current multiview subpass. + bool needs_view_mask_buffer() const { - return used_aux_buffer; + return msl_options.multiview && !msl_options.view_index_from_device_index; + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing the dispatch base workgroup ID. + bool needs_dispatch_base_buffer() const + { + return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2); } // Provide feedback to calling API to allow it to pass an output // buffer if the shader needs it. bool needs_output_buffer() const { - return capture_output_to_buffer && stage_out_var_id != 0; + return capture_output_to_buffer && stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass a patch output // buffer if the shader needs it. bool needs_patch_output_buffer() const { - return capture_output_to_buffer && patch_stage_out_var_id != 0; + return capture_output_to_buffer && patch_stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass an input threadgroup // buffer if the shader needs it. bool needs_input_threadgroup_mem() const { - return capture_output_to_buffer && stage_in_var_id != 0; + return capture_output_to_buffer && stage_in_var_id != ID(0); } explicit CompilerMSL(std::vector<uint32_t> spirv); @@ -288,14 +398,52 @@ public: // the set/binding combination was used by the MSL code. void add_msl_resource_binding(const MSLResourceBinding &resource); + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. index is the index within the dynamic offset buffer to use. This + // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC + // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers + // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with + // an offset taken from the dynamic offset buffer. + void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); + // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. // This corresponds to VK_KHR_push_descriptor in Vulkan. void add_discrete_descriptor_set(uint32_t desc_set); + // If an argument buffer is large enough, it may need to be in the device storage space rather than + // constant. Opt-in to this behavior here on a per set basis. + void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); + // Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader. bool is_msl_vertex_attribute_used(uint32_t location); + + // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. + // Constexpr samplers are always assumed to be emitted. + // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped + // by remap_constexpr_sampler(_by_binding). bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding); + // This must only be called after a successful call to CompilerMSL::compile(). + // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. + // If the descriptor set was part of an argument buffer, report the [[id(N)]], + // or [[buffer/texture/sampler]] binding for other resources. + // If the resource was a combined image sampler, report the image binding here, + // use the _secondary version of this call to query the sampler half of the resource. + // If no binding exists, uint32_t(-1) is returned. + uint32_t get_automatic_msl_resource_binding(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the + // sampler's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, + // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, + // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; + // Compiles the SPIR-V code into Metal Shading Language. std::string compile() override; @@ -305,7 +453,12 @@ public: // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. // This can be used on both combined image/samplers (sampler2D) or standalone samplers. // The remapped sampler must not be an array of samplers. - void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler); + // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. + void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); + + // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. + // Remaps based on ID take priority over set/binding remaps. + void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect // to use for a particular location. The default is 4 if number of components is not overridden. @@ -337,13 +490,44 @@ protected: SPVFuncImplInverse4x4, SPVFuncImplInverse3x3, SPVFuncImplInverse2x2, - SPVFuncImplRowMajor2x3, - SPVFuncImplRowMajor2x4, - SPVFuncImplRowMajor3x2, - SPVFuncImplRowMajor3x4, - SPVFuncImplRowMajor4x2, - SPVFuncImplRowMajor4x3, + // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's + // emitted before them. + SPVFuncImplForwardArgs, + // Likewise, this must come before *Swizzle. + SPVFuncImplGetSwizzle, SPVFuncImplTextureSwizzle, + SPVFuncImplGatherSwizzle, + SPVFuncImplGatherCompareSwizzle, + SPVFuncImplSubgroupBallot, + SPVFuncImplSubgroupBallotBitExtract, + SPVFuncImplSubgroupBallotFindLSB, + SPVFuncImplSubgroupBallotFindMSB, + SPVFuncImplSubgroupBallotBitCount, + SPVFuncImplSubgroupAllEqual, + SPVFuncImplReflectScalar, + SPVFuncImplRefractScalar, + SPVFuncImplFaceForwardScalar, + SPVFuncImplChromaReconstructNearest2Plane, + SPVFuncImplChromaReconstructNearest3Plane, + SPVFuncImplChromaReconstructLinear422CositedEven2Plane, + SPVFuncImplChromaReconstructLinear422CositedEven3Plane, + SPVFuncImplChromaReconstructLinear422Midpoint2Plane, + SPVFuncImplChromaReconstructLinear422Midpoint3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, + SPVFuncImplExpandITUFullRange, + SPVFuncImplExpandITUNarrowRange, + SPVFuncImplConvertYCbCrBT709, + SPVFuncImplConvertYCbCrBT601, + SPVFuncImplConvertYCbCrBT2020, + SPVFuncImplDynamicImageSampler, + SPVFuncImplArrayCopyMultidimMax = 6 }; @@ -351,30 +535,37 @@ protected: void emit_instruction(const Instruction &instr) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count) override; + void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count) override; void emit_header() override; void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_subgroup_op(const Instruction &i) override; + std::string to_texture_op(const Instruction &i, bool *forward, + SmallVector<uint32_t> &inherited_expressions) override; void emit_fixup() override; std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = ""); void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0) override; + void emit_struct_padding_target(const SPIRType &type) override; std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; std::string sampler_type(const SPIRType &type); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; - size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override; - std::string to_func_call_arg(uint32_t id) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_name(uint32_t id, bool allow_alias = true) const override; - std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, - uint32_t lod) override; - std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod, + uint32_t minlod) override; + std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, - uint32_t comp, uint32_t sample, bool *p_forward) override; + uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override; std::string to_initializer_expression(const SPIRVariable &var) override; - std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override; + std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool is_packed, bool row_major) override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; bool skip_argument(uint32_t id) const override; std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; @@ -385,7 +576,8 @@ protected: bool is_patch_block(const SPIRType &type); bool is_non_native_row_major_matrix(uint32_t id) override; bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; - std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) override; void preprocess_op_codes(); void localize_global_variables(); @@ -426,7 +618,7 @@ protected: void emit_specialization_constants_and_structs(); void emit_interface_block(uint32_t ib_var_id); bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); - void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows); + void fix_up_shader_inputs_outputs(); std::string func_type_decl(SPIRType &type); @@ -439,21 +631,43 @@ protected: std::string ensure_valid_name(std::string name, std::string pfx); std::string to_sampler_expression(uint32_t id); std::string to_swizzle_expression(uint32_t id); + std::string to_buffer_size_expression(uint32_t id); std::string builtin_qualifier(spv::BuiltIn builtin); - std::string builtin_type_decl(spv::BuiltIn builtin); + std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); std::string argument_decl(const SPIRFunction::Parameter &arg); std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); - uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype); + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); - size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const; + + // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. + // These values can change depending on various extended decorations which control packing rules. + // We need to make these rules match up with SPIR-V declared rules. + uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; + + uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + + const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; + + uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, + bool ignore_padding = false) const; + std::string to_component_argument(uint32_t id); - void align_struct(SPIRType &ib_type); - bool is_member_packable(SPIRType &ib_type, uint32_t index); - MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index); + void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs); + void mark_scalar_layout_structs(const SPIRType &ib_type); + void mark_struct_members_packed(const SPIRType &type); + void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); + bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; std::string get_argument_address_space(const SPIRVariable &argument); - std::string get_type_address_space(const SPIRType &type, uint32_t id); + std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); + const char *to_restrict(uint32_t id, bool space = true); SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type(); SPIRType &get_patch_stage_in_struct_type(); @@ -466,8 +680,10 @@ protected: void add_pragma_line(const std::string &line); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); - void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override; + void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, + spv::StorageClass rhs_storage) override; void build_implicit_builtins(); + uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override; uint32_t builtin_frag_coord_id = 0; uint32_t builtin_sample_id_id = 0; @@ -475,9 +691,17 @@ protected: uint32_t builtin_base_vertex_id = 0; uint32_t builtin_instance_idx_id = 0; uint32_t builtin_base_instance_id = 0; + uint32_t builtin_view_idx_id = 0; + uint32_t builtin_layer_id = 0; uint32_t builtin_invocation_id_id = 0; uint32_t builtin_primitive_id_id = 0; - uint32_t aux_buffer_id = 0; + uint32_t builtin_subgroup_invocation_id_id = 0; + uint32_t builtin_subgroup_size_id = 0; + uint32_t builtin_dispatch_base_id = 0; + uint32_t swizzle_buffer_id = 0; + uint32_t buffer_size_buffer_id = 0; + uint32_t view_mask_buffer_id = 0; + uint32_t dynamic_offsets_buffer_id = 0; void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; @@ -488,36 +712,65 @@ protected: bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); + void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); + + std::string convert_to_f32(const std::string &expr, uint32_t components); + Options msl_options; std::set<SPVFuncImpl> spv_function_implementations; std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location; std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_builtin; std::unordered_set<uint32_t> vtx_attrs_in_use; std::unordered_map<uint32_t, uint32_t> fragment_output_components; - std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding; std::set<std::string> pragma_lines; std::set<std::string> typedef_lines; SmallVector<uint32_t> vars_needing_early_declaration; - SmallVector<std::pair<MSLResourceBinding, bool>> resource_bindings; + struct SetBindingPair + { + uint32_t desc_set; + uint32_t binding; + bool operator==(const SetBindingPair &other) const; + bool operator<(const SetBindingPair &other) const; + }; + + struct StageSetBinding + { + spv::ExecutionModel model; + uint32_t desc_set; + uint32_t binding; + bool operator==(const StageSetBinding &other) const; + }; + + struct InternalHasher + { + size_t operator()(const SetBindingPair &value) const; + size_t operator()(const StageSetBinding &value) const; + }; + + std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings; + uint32_t next_metal_resource_index_buffer = 0; uint32_t next_metal_resource_index_texture = 0; uint32_t next_metal_resource_index_sampler = 0; - - uint32_t stage_in_var_id = 0; - uint32_t stage_out_var_id = 0; - uint32_t patch_stage_in_var_id = 0; - uint32_t patch_stage_out_var_id = 0; - uint32_t stage_in_ptr_var_id = 0; - uint32_t stage_out_ptr_var_id = 0; + // Intentionally uninitialized, works around MSVC 2013 bug. + uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; + + VariableID stage_in_var_id = 0; + VariableID stage_out_var_id = 0; + VariableID patch_stage_in_var_id = 0; + VariableID patch_stage_out_var_id = 0; + VariableID stage_in_ptr_var_id = 0; + VariableID stage_out_ptr_var_id = 0; bool has_sampled_images = false; bool needs_vertex_idx_arg = false; bool needs_instance_idx_arg = false; bool is_rasterization_disabled = false; bool capture_output_to_buffer = false; - bool needs_aux_buffer_def = false; - bool used_aux_buffer = false; + bool needs_swizzle_buffer_def = false; + bool used_swizzle_buffer = false; bool added_builtin_tess_level = false; + bool needs_subgroup_invocation_id = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -525,17 +778,29 @@ protected: std::string patch_stage_out_var_name = "patchOut"; std::string sampler_name_suffix = "Smplr"; std::string swizzle_name_suffix = "Swzl"; + std::string buffer_size_name_suffix = "BufferSize"; + std::string plane_name_suffix = "Plane"; std::string input_wg_var_name = "gl_in"; std::string output_buffer_var_name = "spvOut"; std::string patch_output_buffer_var_name = "spvPatchOut"; std::string tess_factor_buffer_var_name = "spvTessLevel"; spv::Op previous_instruction_opcode = spv::OpNop; - std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers; + // Must be ordered since declaration is in a specific order. + std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id; + std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding; + const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; + + std::unordered_set<uint32_t> buffers_requiring_array_length; SmallVector<uint32_t> buffer_arrays; + // Must be ordered since array is in a specific order. + std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; + uint32_t argument_buffer_ids[kMaxArgumentBuffers]; uint32_t argument_buffer_discrete_mask = 0; + uint32_t argument_buffer_device_storage_mask = 0; + void analyze_argument_buffers(); bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; @@ -544,6 +809,8 @@ protected: bool suppress_missing_prototypes = false; + void add_spv_func_and_recompile(SPVFuncImpl spv_func); + // OpcodeHandler that handles several MSL preprocessing operations. struct OpCodePreprocessor : OpcodeHandler { @@ -561,6 +828,7 @@ protected: bool suppress_missing_prototypes = false; bool uses_atomics = false; bool uses_resource_write = false; + bool needs_subgroup_invocation_id = false; }; // OpcodeHandler that scans for uses of sampled images diff --git a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp index 1c0a830..08dcff9 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp @@ -60,6 +60,7 @@ static bool is_valid_spirv_version(uint32_t version) case 0x10200: // SPIR-V 1.2 case 0x10300: // SPIR-V 1.3 case 0x10400: // SPIR-V 1.4 + case 0x10500: // SPIR-V 1.5 return true; default: @@ -162,12 +163,15 @@ void Parser::parse(const Instruction &instruction) case OpSourceContinued: case OpSourceExtension: case OpNop: - case OpLine: - case OpNoLine: - case OpString: case OpModuleProcessed: break; + case OpString: + { + set<SPIRString>(ops[0], extract_string(ir.spirv, instruction.offset + 1)); + break; + } + case OpMemoryModel: ir.addressing_model = static_cast<AddressingModel>(ops[0]); ir.memory_model = static_cast<MemoryModel>(ops[1]); @@ -240,6 +244,8 @@ void Parser::parse(const Instruction &instruction) auto ext = extract_string(ir.spirv, instruction.offset + 1); if (ext == "GLSL.std.450") set<SPIRExtension>(id, SPIRExtension::GLSL); + else if (ext == "DebugInfo") + set<SPIRExtension>(id, SPIRExtension::SPV_debug_info); else if (ext == "SPV_AMD_shader_ballot") set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot); else if (ext == "SPV_AMD_shader_explicit_vertex_parameter") @@ -256,6 +262,14 @@ void Parser::parse(const Instruction &instruction) break; } + case OpExtInst: + { + // The SPIR-V debug information extended instructions might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + case OpEntryPoint: { auto itr = @@ -265,7 +279,9 @@ void Parser::parse(const Instruction &instruction) // Strings need nul-terminator and consume the whole word. uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); - e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length); + + for (uint32_t i = strlen_words + 2; i < instruction.length; i++) + e.interface_variables.push_back(ops[i]); // Set the name of the entry point in case OpName is not provided later. ir.set_name(ops[1], e.name); @@ -556,10 +572,6 @@ void Parser::parse(const Instruction &instruction) type.image.sampled = ops[6]; type.image.format = static_cast<ImageFormat>(ops[7]); type.image.access = (length >= 9) ? static_cast<AccessQualifier>(ops[8]) : AccessQualifierMax; - - if (type.image.sampled == 0) - SPIRV_CROSS_THROW("OpTypeImage Sampled parameter must not be zero."); - break; } @@ -649,7 +661,7 @@ void Parser::parse(const Instruction &instruction) } } - if (type.type_alias == 0) + if (type.type_alias == TypeID(0)) global_struct_cache.push_back(id); } break; @@ -999,12 +1011,12 @@ void Parser::parse(const Instruction &instruction) ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT; ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT; - ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self; + ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self); // Don't add loop headers to continue blocks, // which would make it impossible branch into the loop header since // they are treated as continues. - if (current_block->continue_block != current_block->self) + if (current_block->continue_block != BlockID(current_block->self)) ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT; if (length >= 3) @@ -1030,6 +1042,37 @@ void Parser::parse(const Instruction &instruction) break; } + case OpLine: + { + // OpLine might come at global scope, but we don't care about those since they will not be declared in any + // meaningful correct order. + // Ignore all OpLine directives which live outside a function. + if (current_block) + current_block->ops.push_back(instruction); + + // Line directives may arrive before first OpLabel. + // Treat this as the line of the function declaration, + // so warnings for arguments can propagate properly. + if (current_function) + { + // Store the first one we find and emit it before creating the function prototype. + if (current_function->entry_line.file_id == 0) + { + current_function->entry_line.file_id = ops[0]; + current_function->entry_line.line_literal = ops[1]; + } + } + break; + } + + case OpNoLine: + { + // OpNoLine might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + // Actual opcodes. default: { diff --git a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp index 0b2c585..1e8f615 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp @@ -61,6 +61,7 @@ public: void end_json_array(); void emit_json_array_value(const std::string &value); void emit_json_array_value(uint32_t value); + void emit_json_array_value(bool value); std::string str() const { @@ -158,6 +159,16 @@ void Stream::emit_json_array_value(uint32_t value) stack.top().second = true; } +void Stream::emit_json_array_value(bool value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + statement_inner(",\n"); + statement_no_return(value ? "true" : "false"); + stack.top().second = true; +} + void Stream::begin_json_object() { if (!stack.empty() && stack.top().second) @@ -256,6 +267,8 @@ string CompilerReflection::compile() json_stream = std::make_shared<simple_json::Stream>(); json_stream->set_current_locale_radix_character(current_locale_radix_character); json_stream->begin_json_object(); + fixup_type_alias(); + reorder_type_alias(); emit_entry_points(); emit_types(); emit_resources(); @@ -283,7 +296,7 @@ void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag) { auto name = type_to_glsl(type); - if (type.type_alias != 0) + if (type.type_alias != TypeID(0)) return; if (!emitted_open_tag) @@ -422,6 +435,28 @@ void CompilerReflection::emit_entry_points() json_stream->begin_json_object(); json_stream->emit_json_key_value("name", e.name); json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model)); + if (e.execution_model == ExecutionModelGLCompute) + { + const auto &spv_entry = get_entry_point(e.name, e.execution_model); + + SpecializationConstant spec_x, spec_y, spec_z; + get_work_group_size_specialization_constants(spec_x, spec_y, spec_z); + + json_stream->emit_json_key_array("workgroup_size"); + json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id : + spv_entry.workgroup_size.x); + json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id : + spv_entry.workgroup_size.y); + json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id : + spv_entry.workgroup_size.z); + json_stream->end_json_array(); + + json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id"); + json_stream->emit_json_array_value(spec_x.id != ID(0)); + json_stream->emit_json_array_value(spec_y.id != ID(0)); + json_stream->emit_json_array_value(spec_z.id != ID(0)); + json_stream->end_json_array(); + } json_stream->end_json_object(); } json_stream->end_json_array(); @@ -466,7 +501,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) || get_decoration_bitset(type.self).get(DecorationBufferBlock); - uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id; + ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id); json_stream->begin_json_object(); |
