Update SPIRV-Cross

Change-Id: I15dbf83057b5aa435b87b80e219b113987735cad Reviewed-by: Christian Strømme <christian.stromme@qt.io>
author: Laszlo Agocs <laszlo.agocs@qt.io> 2019-10-21 14:09:05 +0200
committer: Laszlo Agocs <laszlo.agocs@qt.io> 2019-10-23 10:29:49 +0000
commit: 53fc739e3d530a70e5371a08d001bacabc0233de (patch)
tree: 836910be948b6d7702b6944fcf0b7947f576d4b4
parent: 3ed14d7b0d539f97f2d68c83cc02d6509b24aea7 (diff)
22 files changed, 10145 insertions, 2027 deletions
diff --git a/src/3rdparty/SPIRV-Cross/qt_attribution.json b/src/3rdparty/SPIRV-Cross/qt_attribution.json
index 70df546..43847a0 100644
--- a/src/3rdparty/SPIRV-Cross/qt_attribution.json
+++ b/src/3rdparty/SPIRV-Cross/qt_attribution.json
@@ -7,7 +7,7 @@
         "QtUsage": "Shader code generation.",
 
         "Homepage": "https://github.com/KhronosGroup/SPIRV-Cross",
-        "Version": "f647e655d489a7699305ada30cda808a7dac079f",
+        "Version": "ff1897ae0e1fc1e37c604933694477f335ca8e44",
         "License": "Apache License 2.0",
         "LicenseId": "Apache-2.0",
         "LicenseFile": "LICENSE",
diff --git a/src/3rdparty/SPIRV-Cross/spirv.h b/src/3rdparty/SPIRV-Cross/spirv.h
index 8da27dd..1b67617 100644
--- a/src/3rdparty/SPIRV-Cross/spirv.h
+++ b/src/3rdparty/SPIRV-Cross/spirv.h
@@ -53,12 +53,12 @@
 
 typedef unsigned int SpvId;
 
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10500
+#define SPV_REVISION 1
 
 static const unsigned int SpvMagicNumber = 0x07230203;
-static const unsigned int SpvVersion = 0x00010300;
-static const unsigned int SpvRevision = 6;
+static const unsigned int SpvVersion = 0x00010400;
+static const unsigned int SpvRevision = 1;
 static const unsigned int SpvOpCodeMask = 0xffff;
 static const unsigned int SpvWordCountShift = 16;
 
@@ -95,6 +95,7 @@ typedef enum SpvAddressingModel_ {
     SpvAddressingModelLogical = 0,
     SpvAddressingModelPhysical32 = 1,
     SpvAddressingModelPhysical64 = 2,
+    SpvAddressingModelPhysicalStorageBuffer64 = 5348,
     SpvAddressingModelPhysicalStorageBuffer64EXT = 5348,
     SpvAddressingModelMax = 0x7fffffff,
 } SpvAddressingModel;
@@ -103,6 +104,7 @@ typedef enum SpvMemoryModel_ {
     SpvMemoryModelSimple = 0,
     SpvMemoryModelGLSL450 = 1,
     SpvMemoryModelOpenCL = 2,
+    SpvMemoryModelVulkan = 3,
     SpvMemoryModelVulkanKHR = 3,
     SpvMemoryModelMax = 0x7fffffff,
 } SpvMemoryModel;
@@ -158,6 +160,12 @@ typedef enum SpvExecutionMode_ {
     SpvExecutionModeDerivativeGroupQuadsNV = 5289,
     SpvExecutionModeDerivativeGroupLinearNV = 5290,
     SpvExecutionModeOutputTrianglesNV = 5298,
+    SpvExecutionModePixelInterlockOrderedEXT = 5366,
+    SpvExecutionModePixelInterlockUnorderedEXT = 5367,
+    SpvExecutionModeSampleInterlockOrderedEXT = 5368,
+    SpvExecutionModeSampleInterlockUnorderedEXT = 5369,
+    SpvExecutionModeShadingRateInterlockOrderedEXT = 5370,
+    SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371,
     SpvExecutionModeMax = 0x7fffffff,
 } SpvExecutionMode;
 
@@ -181,6 +189,7 @@ typedef enum SpvStorageClass_ {
     SpvStorageClassHitAttributeNV = 5339,
     SpvStorageClassIncomingRayPayloadNV = 5342,
     SpvStorageClassShaderRecordBufferNV = 5343,
+    SpvStorageClassPhysicalStorageBuffer = 5349,
     SpvStorageClassPhysicalStorageBufferEXT = 5349,
     SpvStorageClassMax = 0x7fffffff,
 } SpvStorageClass;
@@ -309,10 +318,16 @@ typedef enum SpvImageOperandsShift_ {
     SpvImageOperandsConstOffsetsShift = 5,
     SpvImageOperandsSampleShift = 6,
     SpvImageOperandsMinLodShift = 7,
+    SpvImageOperandsMakeTexelAvailableShift = 8,
     SpvImageOperandsMakeTexelAvailableKHRShift = 8,
+    SpvImageOperandsMakeTexelVisibleShift = 9,
     SpvImageOperandsMakeTexelVisibleKHRShift = 9,
+    SpvImageOperandsNonPrivateTexelShift = 10,
     SpvImageOperandsNonPrivateTexelKHRShift = 10,
+    SpvImageOperandsVolatileTexelShift = 11,
     SpvImageOperandsVolatileTexelKHRShift = 11,
+    SpvImageOperandsSignExtendShift = 12,
+    SpvImageOperandsZeroExtendShift = 13,
     SpvImageOperandsMax = 0x7fffffff,
 } SpvImageOperandsShift;
 
@@ -326,10 +341,16 @@ typedef enum SpvImageOperandsMask_ {
     SpvImageOperandsConstOffsetsMask = 0x00000020,
     SpvImageOperandsSampleMask = 0x00000040,
     SpvImageOperandsMinLodMask = 0x00000080,
+    SpvImageOperandsMakeTexelAvailableMask = 0x00000100,
     SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+    SpvImageOperandsMakeTexelVisibleMask = 0x00000200,
     SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+    SpvImageOperandsNonPrivateTexelMask = 0x00000400,
     SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+    SpvImageOperandsVolatileTexelMask = 0x00000800,
     SpvImageOperandsVolatileTexelKHRMask = 0x00000800,
+    SpvImageOperandsSignExtendMask = 0x00001000,
+    SpvImageOperandsZeroExtendMask = 0x00002000,
 } SpvImageOperandsMask;
 
 typedef enum SpvFPFastMathModeShift_ {
@@ -410,6 +431,7 @@ typedef enum SpvDecoration_ {
     SpvDecorationNonWritable = 24,
     SpvDecorationNonReadable = 25,
     SpvDecorationUniform = 26,
+    SpvDecorationUniformId = 27,
     SpvDecorationSaturatedConversion = 28,
     SpvDecorationStream = 29,
     SpvDecorationLocation = 30,
@@ -441,11 +463,17 @@ typedef enum SpvDecoration_ {
     SpvDecorationPerViewNV = 5272,
     SpvDecorationPerTaskNV = 5273,
     SpvDecorationPerVertexNV = 5285,
+    SpvDecorationNonUniform = 5300,
     SpvDecorationNonUniformEXT = 5300,
+    SpvDecorationRestrictPointer = 5355,
     SpvDecorationRestrictPointerEXT = 5355,
+    SpvDecorationAliasedPointer = 5356,
     SpvDecorationAliasedPointerEXT = 5356,
+    SpvDecorationCounterBuffer = 5634,
     SpvDecorationHlslCounterBufferGOOGLE = 5634,
     SpvDecorationHlslSemanticGOOGLE = 5635,
+    SpvDecorationUserSemantic = 5635,
+    SpvDecorationUserTypeGOOGLE = 5636,
     SpvDecorationMax = 0x7fffffff,
 } SpvDecoration;
 
@@ -548,6 +576,10 @@ typedef enum SpvBuiltIn_ {
     SpvBuiltInHitTNV = 5332,
     SpvBuiltInHitKindNV = 5333,
     SpvBuiltInIncomingRayFlagsNV = 5351,
+    SpvBuiltInWarpsPerSMNV = 5374,
+    SpvBuiltInSMCountNV = 5375,
+    SpvBuiltInWarpIDNV = 5376,
+    SpvBuiltInSMIDNV = 5377,
     SpvBuiltInMax = 0x7fffffff,
 } SpvBuiltIn;
 
@@ -568,6 +600,11 @@ typedef enum SpvLoopControlShift_ {
     SpvLoopControlDontUnrollShift = 1,
     SpvLoopControlDependencyInfiniteShift = 2,
     SpvLoopControlDependencyLengthShift = 3,
+    SpvLoopControlMinIterationsShift = 4,
+    SpvLoopControlMaxIterationsShift = 5,
+    SpvLoopControlIterationMultipleShift = 6,
+    SpvLoopControlPeelCountShift = 7,
+    SpvLoopControlPartialCountShift = 8,
     SpvLoopControlMax = 0x7fffffff,
 } SpvLoopControlShift;
 
@@ -577,6 +614,11 @@ typedef enum SpvLoopControlMask_ {
     SpvLoopControlDontUnrollMask = 0x00000002,
     SpvLoopControlDependencyInfiniteMask = 0x00000004,
     SpvLoopControlDependencyLengthMask = 0x00000008,
+    SpvLoopControlMinIterationsMask = 0x00000010,
+    SpvLoopControlMaxIterationsMask = 0x00000020,
+    SpvLoopControlIterationMultipleMask = 0x00000040,
+    SpvLoopControlPeelCountMask = 0x00000080,
+    SpvLoopControlPartialCountMask = 0x00000100,
 } SpvLoopControlMask;
 
 typedef enum SpvFunctionControlShift_ {
@@ -606,9 +648,13 @@ typedef enum SpvMemorySemanticsShift_ {
     SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
     SpvMemorySemanticsAtomicCounterMemoryShift = 10,
     SpvMemorySemanticsImageMemoryShift = 11,
+    SpvMemorySemanticsOutputMemoryShift = 12,
     SpvMemorySemanticsOutputMemoryKHRShift = 12,
+    SpvMemorySemanticsMakeAvailableShift = 13,
     SpvMemorySemanticsMakeAvailableKHRShift = 13,
+    SpvMemorySemanticsMakeVisibleShift = 14,
     SpvMemorySemanticsMakeVisibleKHRShift = 14,
+    SpvMemorySemanticsVolatileShift = 15,
     SpvMemorySemanticsMax = 0x7fffffff,
 } SpvMemorySemanticsShift;
 
@@ -624,17 +670,24 @@ typedef enum SpvMemorySemanticsMask_ {
     SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
     SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
     SpvMemorySemanticsImageMemoryMask = 0x00000800,
+    SpvMemorySemanticsOutputMemoryMask = 0x00001000,
     SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000,
+    SpvMemorySemanticsMakeAvailableMask = 0x00002000,
     SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000,
+    SpvMemorySemanticsMakeVisibleMask = 0x00004000,
     SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000,
+    SpvMemorySemanticsVolatileMask = 0x00008000,
 } SpvMemorySemanticsMask;
 
 typedef enum SpvMemoryAccessShift_ {
     SpvMemoryAccessVolatileShift = 0,
     SpvMemoryAccessAlignedShift = 1,
     SpvMemoryAccessNontemporalShift = 2,
+    SpvMemoryAccessMakePointerAvailableShift = 3,
     SpvMemoryAccessMakePointerAvailableKHRShift = 3,
+    SpvMemoryAccessMakePointerVisibleShift = 4,
     SpvMemoryAccessMakePointerVisibleKHRShift = 4,
+    SpvMemoryAccessNonPrivatePointerShift = 5,
     SpvMemoryAccessNonPrivatePointerKHRShift = 5,
     SpvMemoryAccessMax = 0x7fffffff,
 } SpvMemoryAccessShift;
@@ -644,8 +697,11 @@ typedef enum SpvMemoryAccessMask_ {
     SpvMemoryAccessVolatileMask = 0x00000001,
     SpvMemoryAccessAlignedMask = 0x00000002,
     SpvMemoryAccessNontemporalMask = 0x00000004,
+    SpvMemoryAccessMakePointerAvailableMask = 0x00000008,
     SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+    SpvMemoryAccessMakePointerVisibleMask = 0x00000010,
     SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+    SpvMemoryAccessNonPrivatePointerMask = 0x00000020,
     SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020,
 } SpvMemoryAccessMask;
 
@@ -655,6 +711,7 @@ typedef enum SpvScope_ {
     SpvScopeWorkgroup = 2,
     SpvScopeSubgroup = 3,
     SpvScopeInvocation = 4,
+    SpvScopeQueueFamily = 5,
     SpvScopeQueueFamilyKHR = 5,
     SpvScopeMax = 0x7fffffff,
 } SpvScope;
@@ -755,6 +812,8 @@ typedef enum SpvCapability_ {
     SpvCapabilityGroupNonUniformShuffleRelative = 66,
     SpvCapabilityGroupNonUniformClustered = 67,
     SpvCapabilityGroupNonUniformQuad = 68,
+    SpvCapabilityShaderLayer = 69,
+    SpvCapabilityShaderViewportIndex = 70,
     SpvCapabilitySubgroupBallotKHR = 4423,
     SpvCapabilityDrawParameters = 4427,
     SpvCapabilitySubgroupVoteKHR = 4431,
@@ -783,6 +842,7 @@ typedef enum SpvCapability_ {
     SpvCapabilityFragmentMaskAMD = 5010,
     SpvCapabilityStencilExportEXT = 5013,
     SpvCapabilityImageReadWriteLodAMD = 5015,
+    SpvCapabilityShaderClockKHR = 5055,
     SpvCapabilitySampleMaskOverrideCoverageNV = 5249,
     SpvCapabilityGeometryShaderPassthroughNV = 5251,
     SpvCapabilityShaderViewportIndexLayerEXT = 5254,
@@ -798,28 +858,52 @@ typedef enum SpvCapability_ {
     SpvCapabilityFragmentDensityEXT = 5291,
     SpvCapabilityShadingRateNV = 5291,
     SpvCapabilityGroupNonUniformPartitionedNV = 5297,
+    SpvCapabilityShaderNonUniform = 5301,
     SpvCapabilityShaderNonUniformEXT = 5301,
+    SpvCapabilityRuntimeDescriptorArray = 5302,
     SpvCapabilityRuntimeDescriptorArrayEXT = 5302,
+    SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303,
     SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+    SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
     SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+    SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
     SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+    SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306,
     SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+    SpvCapabilitySampledImageArrayNonUniformIndexing = 5307,
     SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+    SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308,
     SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+    SpvCapabilityStorageImageArrayNonUniformIndexing = 5309,
     SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+    SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310,
     SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+    SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
     SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+    SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
     SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
     SpvCapabilityRayTracingNV = 5340,
+    SpvCapabilityVulkanMemoryModel = 5345,
     SpvCapabilityVulkanMemoryModelKHR = 5345,
+    SpvCapabilityVulkanMemoryModelDeviceScope = 5346,
     SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    SpvCapabilityPhysicalStorageBufferAddresses = 5347,
     SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347,
     SpvCapabilityComputeDerivativeGroupLinearNV = 5350,
     SpvCapabilityCooperativeMatrixNV = 5357,
+    SpvCapabilityFragmentShaderSampleInterlockEXT = 5363,
+    SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+    SpvCapabilityShaderSMBuiltinsNV = 5373,
+    SpvCapabilityFragmentShaderPixelInterlockEXT = 5378,
+    SpvCapabilityDemoteToHelperInvocationEXT = 5379,
     SpvCapabilitySubgroupShuffleINTEL = 5568,
     SpvCapabilitySubgroupBufferBlockIOINTEL = 5569,
     SpvCapabilitySubgroupImageBlockIOINTEL = 5570,
     SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+    SpvCapabilityIntegerFunctions2INTEL = 5584,
+    SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+    SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+    SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
     SpvCapabilityMax = 0x7fffffff,
 } SpvCapability;
 
@@ -1164,6 +1248,10 @@ typedef enum SpvOp_ {
     SpvOpGroupNonUniformLogicalXor = 364,
     SpvOpGroupNonUniformQuadBroadcast = 365,
     SpvOpGroupNonUniformQuadSwap = 366,
+    SpvOpCopyLogical = 400,
+    SpvOpPtrEqual = 401,
+    SpvOpPtrNotEqual = 402,
+    SpvOpPtrDiff = 403,
     SpvOpSubgroupBallotKHR = 4421,
     SpvOpSubgroupFirstInvocationKHR = 4422,
     SpvOpSubgroupAllKHR = 4428,
@@ -1180,6 +1268,7 @@ typedef enum SpvOp_ {
     SpvOpGroupSMaxNonUniformAMD = 5007,
     SpvOpFragmentMaskFetchAMD = 5011,
     SpvOpFragmentFetchAMD = 5012,
+    SpvOpReadClockKHR = 5056,
     SpvOpImageSampleFootprintNV = 5283,
     SpvOpGroupNonUniformPartitionNV = 5296,
     SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
@@ -1194,6 +1283,10 @@ typedef enum SpvOp_ {
     SpvOpCooperativeMatrixStoreNV = 5360,
     SpvOpCooperativeMatrixMulAddNV = 5361,
     SpvOpCooperativeMatrixLengthNV = 5362,
+    SpvOpBeginInvocationInterlockEXT = 5364,
+    SpvOpEndInvocationInterlockEXT = 5365,
+    SpvOpDemoteToHelperInvocationEXT = 5380,
+    SpvOpIsHelperInvocationEXT = 5381,
     SpvOpSubgroupShuffleINTEL = 5571,
     SpvOpSubgroupShuffleDownINTEL = 5572,
     SpvOpSubgroupShuffleUpINTEL = 5573,
@@ -1204,10 +1297,676 @@ typedef enum SpvOp_ {
     SpvOpSubgroupImageBlockWriteINTEL = 5578,
     SpvOpSubgroupImageMediaBlockReadINTEL = 5580,
     SpvOpSubgroupImageMediaBlockWriteINTEL = 5581,
+    SpvOpUCountLeadingZerosINTEL = 5585,
+    SpvOpUCountTrailingZerosINTEL = 5586,
+    SpvOpAbsISubINTEL = 5587,
+    SpvOpAbsUSubINTEL = 5588,
+    SpvOpIAddSatINTEL = 5589,
+    SpvOpUAddSatINTEL = 5590,
+    SpvOpIAverageINTEL = 5591,
+    SpvOpUAverageINTEL = 5592,
+    SpvOpIAverageRoundedINTEL = 5593,
+    SpvOpUAverageRoundedINTEL = 5594,
+    SpvOpISubSatINTEL = 5595,
+    SpvOpUSubSatINTEL = 5596,
+    SpvOpIMul32x16INTEL = 5597,
+    SpvOpUMul32x16INTEL = 5598,
+    SpvOpDecorateString = 5632,
     SpvOpDecorateStringGOOGLE = 5632,
+    SpvOpMemberDecorateString = 5633,
     SpvOpMemberDecorateStringGOOGLE = 5633,
+    SpvOpVmeImageINTEL = 5699,
+    SpvOpTypeVmeImageINTEL = 5700,
+    SpvOpTypeAvcImePayloadINTEL = 5701,
+    SpvOpTypeAvcRefPayloadINTEL = 5702,
+    SpvOpTypeAvcSicPayloadINTEL = 5703,
+    SpvOpTypeAvcMcePayloadINTEL = 5704,
+    SpvOpTypeAvcMceResultINTEL = 5705,
+    SpvOpTypeAvcImeResultINTEL = 5706,
+    SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+    SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+    SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+    SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+    SpvOpTypeAvcRefResultINTEL = 5711,
+    SpvOpTypeAvcSicResultINTEL = 5712,
+    SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+    SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+    SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+    SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+    SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+    SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+    SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+    SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+    SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+    SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+    SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+    SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+    SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+    SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+    SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+    SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+    SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+    SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+    SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+    SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+    SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+    SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+    SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+    SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+    SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+    SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+    SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+    SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+    SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+    SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+    SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+    SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+    SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+    SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+    SpvOpSubgroupAvcImeInitializeINTEL = 5747,
+    SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+    SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+    SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+    SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+    SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+    SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+    SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+    SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+    SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+    SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+    SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+    SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+    SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+    SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+    SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+    SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+    SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+    SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+    SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+    SpvOpSubgroupAvcFmeInitializeINTEL = 5781,
+    SpvOpSubgroupAvcBmeInitializeINTEL = 5782,
+    SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+    SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+    SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+    SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+    SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+    SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+    SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+    SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+    SpvOpSubgroupAvcSicInitializeINTEL = 5791,
+    SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792,
+    SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+    SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+    SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+    SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+    SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+    SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+    SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+    SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+    SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+    SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+    SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+    SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+    SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+    SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+    SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+    SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+    SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+    SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+    SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+    SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+    SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+    SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+    SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+    SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
     SpvOpMax = 0x7fffffff,
 } SpvOp;
 
+#ifdef SPV_ENABLE_UTILITY_CODE
+inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) {
+    *hasResult = *hasResultType = false;
+    switch (opcode) {
+    default: /* unknown opcode */ break;
+    case SpvOpNop: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUndef: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSource: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break;
+    case SpvOpName: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberName: *hasResult = false; *hasResultType = false; break;
+    case SpvOpString: *hasResult = true; *hasResultType = false; break;
+    case SpvOpLine: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExtension: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break;
+    case SpvOpExtInst: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCapability: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+    case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstant: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunction: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+    case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVariable: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLoad: *hasResult = true; *hasResultType = true; break;
+    case SpvOpStore: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+    case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+    case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTranspose: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageRead: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break;
+    case SpvOpImage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSNegate: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFNegate: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFSub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSRem: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFRem: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsNan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsInf: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+    case SpvOpOrdered: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUnordered: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSelect: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpULessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpNot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdx: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdy: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidth: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+    case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPhi: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLabel: *hasResult = true; *hasResultType = false; break;
+    case SpvOpBranch: *hasResult = false; *hasResultType = false; break;
+    case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSwitch: *hasResult = false; *hasResultType = false; break;
+    case SpvOpKill: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReturn: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+    case SpvOpNoLine: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+    case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+    case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+    case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+    case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+    case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+    }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
 #endif
 
diff --git a/src/3rdparty/SPIRV-Cross/spirv.hpp b/src/3rdparty/SPIRV-Cross/spirv.hpp
index adc13de..f5cbda1 100644
--- a/src/3rdparty/SPIRV-Cross/spirv.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv.hpp
@@ -49,12 +49,12 @@ namespace spv {
 
 typedef unsigned int Id;
 
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10500
+#define SPV_REVISION 1
 
 static const unsigned int MagicNumber = 0x07230203;
-static const unsigned int Version = 0x00010300;
-static const unsigned int Revision = 6;
+static const unsigned int Version = 0x00010400;
+static const unsigned int Revision = 1;
 static const unsigned int OpCodeMask = 0xffff;
 static const unsigned int WordCountShift = 16;
 
@@ -91,6 +91,7 @@ enum AddressingModel {
     AddressingModelLogical = 0,
     AddressingModelPhysical32 = 1,
     AddressingModelPhysical64 = 2,
+    AddressingModelPhysicalStorageBuffer64 = 5348,
     AddressingModelPhysicalStorageBuffer64EXT = 5348,
     AddressingModelMax = 0x7fffffff,
 };
@@ -99,6 +100,7 @@ enum MemoryModel {
     MemoryModelSimple = 0,
     MemoryModelGLSL450 = 1,
     MemoryModelOpenCL = 2,
+    MemoryModelVulkan = 3,
     MemoryModelVulkanKHR = 3,
     MemoryModelMax = 0x7fffffff,
 };
@@ -154,6 +156,12 @@ enum ExecutionMode {
     ExecutionModeDerivativeGroupQuadsNV = 5289,
     ExecutionModeDerivativeGroupLinearNV = 5290,
     ExecutionModeOutputTrianglesNV = 5298,
+    ExecutionModePixelInterlockOrderedEXT = 5366,
+    ExecutionModePixelInterlockUnorderedEXT = 5367,
+    ExecutionModeSampleInterlockOrderedEXT = 5368,
+    ExecutionModeSampleInterlockUnorderedEXT = 5369,
+    ExecutionModeShadingRateInterlockOrderedEXT = 5370,
+    ExecutionModeShadingRateInterlockUnorderedEXT = 5371,
     ExecutionModeMax = 0x7fffffff,
 };
 
@@ -177,6 +185,7 @@ enum StorageClass {
     StorageClassHitAttributeNV = 5339,
     StorageClassIncomingRayPayloadNV = 5342,
     StorageClassShaderRecordBufferNV = 5343,
+    StorageClassPhysicalStorageBuffer = 5349,
     StorageClassPhysicalStorageBufferEXT = 5349,
     StorageClassMax = 0x7fffffff,
 };
@@ -305,10 +314,16 @@ enum ImageOperandsShift {
     ImageOperandsConstOffsetsShift = 5,
     ImageOperandsSampleShift = 6,
     ImageOperandsMinLodShift = 7,
+    ImageOperandsMakeTexelAvailableShift = 8,
     ImageOperandsMakeTexelAvailableKHRShift = 8,
+    ImageOperandsMakeTexelVisibleShift = 9,
     ImageOperandsMakeTexelVisibleKHRShift = 9,
+    ImageOperandsNonPrivateTexelShift = 10,
     ImageOperandsNonPrivateTexelKHRShift = 10,
+    ImageOperandsVolatileTexelShift = 11,
     ImageOperandsVolatileTexelKHRShift = 11,
+    ImageOperandsSignExtendShift = 12,
+    ImageOperandsZeroExtendShift = 13,
     ImageOperandsMax = 0x7fffffff,
 };
 
@@ -322,10 +337,16 @@ enum ImageOperandsMask {
     ImageOperandsConstOffsetsMask = 0x00000020,
     ImageOperandsSampleMask = 0x00000040,
     ImageOperandsMinLodMask = 0x00000080,
+    ImageOperandsMakeTexelAvailableMask = 0x00000100,
     ImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+    ImageOperandsMakeTexelVisibleMask = 0x00000200,
     ImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+    ImageOperandsNonPrivateTexelMask = 0x00000400,
     ImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+    ImageOperandsVolatileTexelMask = 0x00000800,
     ImageOperandsVolatileTexelKHRMask = 0x00000800,
+    ImageOperandsSignExtendMask = 0x00001000,
+    ImageOperandsZeroExtendMask = 0x00002000,
 };
 
 enum FPFastMathModeShift {
@@ -406,6 +427,7 @@ enum Decoration {
     DecorationNonWritable = 24,
     DecorationNonReadable = 25,
     DecorationUniform = 26,
+    DecorationUniformId = 27,
     DecorationSaturatedConversion = 28,
     DecorationStream = 29,
     DecorationLocation = 30,
@@ -437,11 +459,17 @@ enum Decoration {
     DecorationPerViewNV = 5272,
     DecorationPerTaskNV = 5273,
     DecorationPerVertexNV = 5285,
+    DecorationNonUniform = 5300,
     DecorationNonUniformEXT = 5300,
+    DecorationRestrictPointer = 5355,
     DecorationRestrictPointerEXT = 5355,
+    DecorationAliasedPointer = 5356,
     DecorationAliasedPointerEXT = 5356,
+    DecorationCounterBuffer = 5634,
     DecorationHlslCounterBufferGOOGLE = 5634,
     DecorationHlslSemanticGOOGLE = 5635,
+    DecorationUserSemantic = 5635,
+    DecorationUserTypeGOOGLE = 5636,
     DecorationMax = 0x7fffffff,
 };
 
@@ -544,6 +572,10 @@ enum BuiltIn {
     BuiltInHitTNV = 5332,
     BuiltInHitKindNV = 5333,
     BuiltInIncomingRayFlagsNV = 5351,
+    BuiltInWarpsPerSMNV = 5374,
+    BuiltInSMCountNV = 5375,
+    BuiltInWarpIDNV = 5376,
+    BuiltInSMIDNV = 5377,
     BuiltInMax = 0x7fffffff,
 };
 
@@ -564,6 +596,11 @@ enum LoopControlShift {
     LoopControlDontUnrollShift = 1,
     LoopControlDependencyInfiniteShift = 2,
     LoopControlDependencyLengthShift = 3,
+    LoopControlMinIterationsShift = 4,
+    LoopControlMaxIterationsShift = 5,
+    LoopControlIterationMultipleShift = 6,
+    LoopControlPeelCountShift = 7,
+    LoopControlPartialCountShift = 8,
     LoopControlMax = 0x7fffffff,
 };
 
@@ -573,6 +610,11 @@ enum LoopControlMask {
     LoopControlDontUnrollMask = 0x00000002,
     LoopControlDependencyInfiniteMask = 0x00000004,
     LoopControlDependencyLengthMask = 0x00000008,
+    LoopControlMinIterationsMask = 0x00000010,
+    LoopControlMaxIterationsMask = 0x00000020,
+    LoopControlIterationMultipleMask = 0x00000040,
+    LoopControlPeelCountMask = 0x00000080,
+    LoopControlPartialCountMask = 0x00000100,
 };
 
 enum FunctionControlShift {
@@ -602,9 +644,13 @@ enum MemorySemanticsShift {
     MemorySemanticsCrossWorkgroupMemoryShift = 9,
     MemorySemanticsAtomicCounterMemoryShift = 10,
     MemorySemanticsImageMemoryShift = 11,
+    MemorySemanticsOutputMemoryShift = 12,
     MemorySemanticsOutputMemoryKHRShift = 12,
+    MemorySemanticsMakeAvailableShift = 13,
     MemorySemanticsMakeAvailableKHRShift = 13,
+    MemorySemanticsMakeVisibleShift = 14,
     MemorySemanticsMakeVisibleKHRShift = 14,
+    MemorySemanticsVolatileShift = 15,
     MemorySemanticsMax = 0x7fffffff,
 };
 
@@ -620,17 +666,24 @@ enum MemorySemanticsMask {
     MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
     MemorySemanticsAtomicCounterMemoryMask = 0x00000400,
     MemorySemanticsImageMemoryMask = 0x00000800,
+    MemorySemanticsOutputMemoryMask = 0x00001000,
     MemorySemanticsOutputMemoryKHRMask = 0x00001000,
+    MemorySemanticsMakeAvailableMask = 0x00002000,
     MemorySemanticsMakeAvailableKHRMask = 0x00002000,
+    MemorySemanticsMakeVisibleMask = 0x00004000,
     MemorySemanticsMakeVisibleKHRMask = 0x00004000,
+    MemorySemanticsVolatileMask = 0x00008000,
 };
 
 enum MemoryAccessShift {
     MemoryAccessVolatileShift = 0,
     MemoryAccessAlignedShift = 1,
     MemoryAccessNontemporalShift = 2,
+    MemoryAccessMakePointerAvailableShift = 3,
     MemoryAccessMakePointerAvailableKHRShift = 3,
+    MemoryAccessMakePointerVisibleShift = 4,
     MemoryAccessMakePointerVisibleKHRShift = 4,
+    MemoryAccessNonPrivatePointerShift = 5,
     MemoryAccessNonPrivatePointerKHRShift = 5,
     MemoryAccessMax = 0x7fffffff,
 };
@@ -640,8 +693,11 @@ enum MemoryAccessMask {
     MemoryAccessVolatileMask = 0x00000001,
     MemoryAccessAlignedMask = 0x00000002,
     MemoryAccessNontemporalMask = 0x00000004,
+    MemoryAccessMakePointerAvailableMask = 0x00000008,
     MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+    MemoryAccessMakePointerVisibleMask = 0x00000010,
     MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+    MemoryAccessNonPrivatePointerMask = 0x00000020,
     MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
 };
 
@@ -651,6 +707,7 @@ enum Scope {
     ScopeWorkgroup = 2,
     ScopeSubgroup = 3,
     ScopeInvocation = 4,
+    ScopeQueueFamily = 5,
     ScopeQueueFamilyKHR = 5,
     ScopeMax = 0x7fffffff,
 };
@@ -751,6 +808,8 @@ enum Capability {
     CapabilityGroupNonUniformShuffleRelative = 66,
     CapabilityGroupNonUniformClustered = 67,
     CapabilityGroupNonUniformQuad = 68,
+    CapabilityShaderLayer = 69,
+    CapabilityShaderViewportIndex = 70,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
     CapabilitySubgroupVoteKHR = 4431,
@@ -779,6 +838,7 @@ enum Capability {
     CapabilityFragmentMaskAMD = 5010,
     CapabilityStencilExportEXT = 5013,
     CapabilityImageReadWriteLodAMD = 5015,
+    CapabilityShaderClockKHR = 5055,
     CapabilitySampleMaskOverrideCoverageNV = 5249,
     CapabilityGeometryShaderPassthroughNV = 5251,
     CapabilityShaderViewportIndexLayerEXT = 5254,
@@ -794,27 +854,52 @@ enum Capability {
     CapabilityFragmentDensityEXT = 5291,
     CapabilityShadingRateNV = 5291,
     CapabilityGroupNonUniformPartitionedNV = 5297,
+    CapabilityShaderNonUniform = 5301,
     CapabilityShaderNonUniformEXT = 5301,
+    CapabilityRuntimeDescriptorArray = 5302,
     CapabilityRuntimeDescriptorArrayEXT = 5302,
+    CapabilityInputAttachmentArrayDynamicIndexing = 5303,
     CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+    CapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
     CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+    CapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
     CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+    CapabilityUniformBufferArrayNonUniformIndexing = 5306,
     CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+    CapabilitySampledImageArrayNonUniformIndexing = 5307,
     CapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+    CapabilityStorageBufferArrayNonUniformIndexing = 5308,
     CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+    CapabilityStorageImageArrayNonUniformIndexing = 5309,
     CapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+    CapabilityInputAttachmentArrayNonUniformIndexing = 5310,
     CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+    CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
     CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+    CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
     CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
     CapabilityRayTracingNV = 5340,
+    CapabilityVulkanMemoryModel = 5345,
     CapabilityVulkanMemoryModelKHR = 5345,
+    CapabilityVulkanMemoryModelDeviceScope = 5346,
     CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    CapabilityPhysicalStorageBufferAddresses = 5347,
     CapabilityPhysicalStorageBufferAddressesEXT = 5347,
     CapabilityComputeDerivativeGroupLinearNV = 5350,
+    CapabilityCooperativeMatrixNV = 5357,
+    CapabilityFragmentShaderSampleInterlockEXT = 5363,
+    CapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+    CapabilityShaderSMBuiltinsNV = 5373,
+    CapabilityFragmentShaderPixelInterlockEXT = 5378,
+    CapabilityDemoteToHelperInvocationEXT = 5379,
     CapabilitySubgroupShuffleINTEL = 5568,
     CapabilitySubgroupBufferBlockIOINTEL = 5569,
     CapabilitySubgroupImageBlockIOINTEL = 5570,
     CapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+    CapabilityIntegerFunctions2INTEL = 5584,
+    CapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+    CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+    CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -1159,6 +1244,10 @@ enum Op {
     OpGroupNonUniformLogicalXor = 364,
     OpGroupNonUniformQuadBroadcast = 365,
     OpGroupNonUniformQuadSwap = 366,
+    OpCopyLogical = 400,
+    OpPtrEqual = 401,
+    OpPtrNotEqual = 402,
+    OpPtrDiff = 403,
     OpSubgroupBallotKHR = 4421,
     OpSubgroupFirstInvocationKHR = 4422,
     OpSubgroupAllKHR = 4428,
@@ -1175,6 +1264,7 @@ enum Op {
     OpGroupSMaxNonUniformAMD = 5007,
     OpFragmentMaskFetchAMD = 5011,
     OpFragmentFetchAMD = 5012,
+    OpReadClockKHR = 5056,
     OpImageSampleFootprintNV = 5283,
     OpGroupNonUniformPartitionNV = 5296,
     OpWritePackedPrimitiveIndices4x8NV = 5299,
@@ -1184,6 +1274,15 @@ enum Op {
     OpTraceNV = 5337,
     OpTypeAccelerationStructureNV = 5341,
     OpExecuteCallableNV = 5344,
+    OpTypeCooperativeMatrixNV = 5358,
+    OpCooperativeMatrixLoadNV = 5359,
+    OpCooperativeMatrixStoreNV = 5360,
+    OpCooperativeMatrixMulAddNV = 5361,
+    OpCooperativeMatrixLengthNV = 5362,
+    OpBeginInvocationInterlockEXT = 5364,
+    OpEndInvocationInterlockEXT = 5365,
+    OpDemoteToHelperInvocationEXT = 5380,
+    OpIsHelperInvocationEXT = 5381,
     OpSubgroupShuffleINTEL = 5571,
     OpSubgroupShuffleDownINTEL = 5572,
     OpSubgroupShuffleUpINTEL = 5573,
@@ -1194,11 +1293,677 @@ enum Op {
     OpSubgroupImageBlockWriteINTEL = 5578,
     OpSubgroupImageMediaBlockReadINTEL = 5580,
     OpSubgroupImageMediaBlockWriteINTEL = 5581,
+    OpUCountLeadingZerosINTEL = 5585,
+    OpUCountTrailingZerosINTEL = 5586,
+    OpAbsISubINTEL = 5587,
+    OpAbsUSubINTEL = 5588,
+    OpIAddSatINTEL = 5589,
+    OpUAddSatINTEL = 5590,
+    OpIAverageINTEL = 5591,
+    OpUAverageINTEL = 5592,
+    OpIAverageRoundedINTEL = 5593,
+    OpUAverageRoundedINTEL = 5594,
+    OpISubSatINTEL = 5595,
+    OpUSubSatINTEL = 5596,
+    OpIMul32x16INTEL = 5597,
+    OpUMul32x16INTEL = 5598,
+    OpDecorateString = 5632,
     OpDecorateStringGOOGLE = 5632,
+    OpMemberDecorateString = 5633,
     OpMemberDecorateStringGOOGLE = 5633,
+    OpVmeImageINTEL = 5699,
+    OpTypeVmeImageINTEL = 5700,
+    OpTypeAvcImePayloadINTEL = 5701,
+    OpTypeAvcRefPayloadINTEL = 5702,
+    OpTypeAvcSicPayloadINTEL = 5703,
+    OpTypeAvcMcePayloadINTEL = 5704,
+    OpTypeAvcMceResultINTEL = 5705,
+    OpTypeAvcImeResultINTEL = 5706,
+    OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+    OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+    OpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+    OpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+    OpTypeAvcRefResultINTEL = 5711,
+    OpTypeAvcSicResultINTEL = 5712,
+    OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+    OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+    OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+    OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+    OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+    OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+    OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+    OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+    OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+    OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+    OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+    OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+    OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+    OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+    OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+    OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+    OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+    OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+    OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+    OpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+    OpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+    OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+    OpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+    OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+    OpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+    OpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+    OpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+    OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+    OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+    OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+    OpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+    OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+    OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+    OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+    OpSubgroupAvcImeInitializeINTEL = 5747,
+    OpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+    OpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+    OpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+    OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+    OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+    OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+    OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+    OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+    OpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+    OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+    OpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+    OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+    OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+    OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+    OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+    OpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+    OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+    OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+    OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+    OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+    OpSubgroupAvcFmeInitializeINTEL = 5781,
+    OpSubgroupAvcBmeInitializeINTEL = 5782,
+    OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+    OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+    OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+    OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+    OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+    OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+    OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+    OpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+    OpSubgroupAvcSicInitializeINTEL = 5791,
+    OpSubgroupAvcSicConfigureSkcINTEL = 5792,
+    OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+    OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+    OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+    OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+    OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+    OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+    OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+    OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+    OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+    OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+    OpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+    OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+    OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+    OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+    OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+    OpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+    OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+    OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+    OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+    OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+    OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+    OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+    OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+    OpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
     OpMax = 0x7fffffff,
 };
 
+#ifdef SPV_ENABLE_UTILITY_CODE
+inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
+    *hasResult = *hasResultType = false;
+    switch (opcode) {
+    default: /* unknown opcode */ break;
+    case OpNop: *hasResult = false; *hasResultType = false; break;
+    case OpUndef: *hasResult = true; *hasResultType = true; break;
+    case OpSourceContinued: *hasResult = false; *hasResultType = false; break;
+    case OpSource: *hasResult = false; *hasResultType = false; break;
+    case OpSourceExtension: *hasResult = false; *hasResultType = false; break;
+    case OpName: *hasResult = false; *hasResultType = false; break;
+    case OpMemberName: *hasResult = false; *hasResultType = false; break;
+    case OpString: *hasResult = true; *hasResultType = false; break;
+    case OpLine: *hasResult = false; *hasResultType = false; break;
+    case OpExtension: *hasResult = false; *hasResultType = false; break;
+    case OpExtInstImport: *hasResult = true; *hasResultType = false; break;
+    case OpExtInst: *hasResult = true; *hasResultType = true; break;
+    case OpMemoryModel: *hasResult = false; *hasResultType = false; break;
+    case OpEntryPoint: *hasResult = false; *hasResultType = false; break;
+    case OpExecutionMode: *hasResult = false; *hasResultType = false; break;
+    case OpCapability: *hasResult = false; *hasResultType = false; break;
+    case OpTypeVoid: *hasResult = true; *hasResultType = false; break;
+    case OpTypeBool: *hasResult = true; *hasResultType = false; break;
+    case OpTypeInt: *hasResult = true; *hasResultType = false; break;
+    case OpTypeFloat: *hasResult = true; *hasResultType = false; break;
+    case OpTypeVector: *hasResult = true; *hasResultType = false; break;
+    case OpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+    case OpTypeImage: *hasResult = true; *hasResultType = false; break;
+    case OpTypeSampler: *hasResult = true; *hasResultType = false; break;
+    case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+    case OpTypeArray: *hasResult = true; *hasResultType = false; break;
+    case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+    case OpTypeStruct: *hasResult = true; *hasResultType = false; break;
+    case OpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+    case OpTypePointer: *hasResult = true; *hasResultType = false; break;
+    case OpTypeFunction: *hasResult = true; *hasResultType = false; break;
+    case OpTypeEvent: *hasResult = true; *hasResultType = false; break;
+    case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+    case OpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+    case OpTypeQueue: *hasResult = true; *hasResultType = false; break;
+    case OpTypePipe: *hasResult = true; *hasResultType = false; break;
+    case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+    case OpConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case OpConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case OpConstant: *hasResult = true; *hasResultType = true; break;
+    case OpConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case OpConstantSampler: *hasResult = true; *hasResultType = true; break;
+    case OpConstantNull: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstant: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+    case OpFunction: *hasResult = true; *hasResultType = true; break;
+    case OpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+    case OpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+    case OpFunctionCall: *hasResult = true; *hasResultType = true; break;
+    case OpVariable: *hasResult = true; *hasResultType = true; break;
+    case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+    case OpLoad: *hasResult = true; *hasResultType = true; break;
+    case OpStore: *hasResult = false; *hasResultType = false; break;
+    case OpCopyMemory: *hasResult = false; *hasResultType = false; break;
+    case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+    case OpAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpArrayLength: *hasResult = true; *hasResultType = true; break;
+    case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+    case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+    case OpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+    case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+    case OpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+    case OpCopyObject: *hasResult = true; *hasResultType = true; break;
+    case OpTranspose: *hasResult = true; *hasResultType = true; break;
+    case OpSampledImage: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageFetch: *hasResult = true; *hasResultType = true; break;
+    case OpImageGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageRead: *hasResult = true; *hasResultType = true; break;
+    case OpImageWrite: *hasResult = false; *hasResultType = false; break;
+    case OpImage: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+    case OpConvertFToU: *hasResult = true; *hasResultType = true; break;
+    case OpConvertFToS: *hasResult = true; *hasResultType = true; break;
+    case OpConvertSToF: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToF: *hasResult = true; *hasResultType = true; break;
+    case OpUConvert: *hasResult = true; *hasResultType = true; break;
+    case OpSConvert: *hasResult = true; *hasResultType = true; break;
+    case OpFConvert: *hasResult = true; *hasResultType = true; break;
+    case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+    case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+    case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+    case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+    case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+    case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+    case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+    case OpBitcast: *hasResult = true; *hasResultType = true; break;
+    case OpSNegate: *hasResult = true; *hasResultType = true; break;
+    case OpFNegate: *hasResult = true; *hasResultType = true; break;
+    case OpIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpISub: *hasResult = true; *hasResultType = true; break;
+    case OpFSub: *hasResult = true; *hasResultType = true; break;
+    case OpIMul: *hasResult = true; *hasResultType = true; break;
+    case OpFMul: *hasResult = true; *hasResultType = true; break;
+    case OpUDiv: *hasResult = true; *hasResultType = true; break;
+    case OpSDiv: *hasResult = true; *hasResultType = true; break;
+    case OpFDiv: *hasResult = true; *hasResultType = true; break;
+    case OpUMod: *hasResult = true; *hasResultType = true; break;
+    case OpSRem: *hasResult = true; *hasResultType = true; break;
+    case OpSMod: *hasResult = true; *hasResultType = true; break;
+    case OpFRem: *hasResult = true; *hasResultType = true; break;
+    case OpFMod: *hasResult = true; *hasResultType = true; break;
+    case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case OpOuterProduct: *hasResult = true; *hasResultType = true; break;
+    case OpDot: *hasResult = true; *hasResultType = true; break;
+    case OpIAddCarry: *hasResult = true; *hasResultType = true; break;
+    case OpISubBorrow: *hasResult = true; *hasResultType = true; break;
+    case OpUMulExtended: *hasResult = true; *hasResultType = true; break;
+    case OpSMulExtended: *hasResult = true; *hasResultType = true; break;
+    case OpAny: *hasResult = true; *hasResultType = true; break;
+    case OpAll: *hasResult = true; *hasResultType = true; break;
+    case OpIsNan: *hasResult = true; *hasResultType = true; break;
+    case OpIsInf: *hasResult = true; *hasResultType = true; break;
+    case OpIsFinite: *hasResult = true; *hasResultType = true; break;
+    case OpIsNormal: *hasResult = true; *hasResultType = true; break;
+    case OpSignBitSet: *hasResult = true; *hasResultType = true; break;
+    case OpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+    case OpOrdered: *hasResult = true; *hasResultType = true; break;
+    case OpUnordered: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalNot: *hasResult = true; *hasResultType = true; break;
+    case OpSelect: *hasResult = true; *hasResultType = true; break;
+    case OpIEqual: *hasResult = true; *hasResultType = true; break;
+    case OpINotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpULessThan: *hasResult = true; *hasResultType = true; break;
+    case OpSLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+    case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+    case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case OpNot: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+    case OpBitReverse: *hasResult = true; *hasResultType = true; break;
+    case OpBitCount: *hasResult = true; *hasResultType = true; break;
+    case OpDPdx: *hasResult = true; *hasResultType = true; break;
+    case OpDPdy: *hasResult = true; *hasResultType = true; break;
+    case OpFwidth: *hasResult = true; *hasResultType = true; break;
+    case OpDPdxFine: *hasResult = true; *hasResultType = true; break;
+    case OpDPdyFine: *hasResult = true; *hasResultType = true; break;
+    case OpFwidthFine: *hasResult = true; *hasResultType = true; break;
+    case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpEmitVertex: *hasResult = false; *hasResultType = false; break;
+    case OpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+    case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+    case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+    case OpControlBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicStore: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicISub: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicOr: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicXor: *hasResult = true; *hasResultType = true; break;
+    case OpPhi: *hasResult = true; *hasResultType = true; break;
+    case OpLoopMerge: *hasResult = false; *hasResultType = false; break;
+    case OpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+    case OpLabel: *hasResult = true; *hasResultType = false; break;
+    case OpBranch: *hasResult = false; *hasResultType = false; break;
+    case OpBranchConditional: *hasResult = false; *hasResultType = false; break;
+    case OpSwitch: *hasResult = false; *hasResultType = false; break;
+    case OpKill: *hasResult = false; *hasResultType = false; break;
+    case OpReturn: *hasResult = false; *hasResultType = false; break;
+    case OpReturnValue: *hasResult = false; *hasResultType = false; break;
+    case OpUnreachable: *hasResult = false; *hasResultType = false; break;
+    case OpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+    case OpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+    case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+    case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+    case OpGroupAll: *hasResult = true; *hasResultType = true; break;
+    case OpGroupAny: *hasResult = true; *hasResultType = true; break;
+    case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMax: *hasResult = true; *hasResultType = true; break;
+    case OpReadPipe: *hasResult = true; *hasResultType = true; break;
+    case OpWritePipe: *hasResult = true; *hasResultType = true; break;
+    case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+    case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+    case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+    case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+    case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+    case OpRetainEvent: *hasResult = false; *hasResultType = false; break;
+    case OpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+    case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+    case OpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+    case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+    case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+    case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+    case OpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+    case OpNoLine: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+    case OpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+    case OpSizeOf: *hasResult = true; *hasResultType = true; break;
+    case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+    case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+    case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+    case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+    case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+    case OpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+    case OpDecorateId: *hasResult = false; *hasResultType = false; break;
+    case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+    case OpCopyLogical: *hasResult = true; *hasResultType = true; break;
+    case OpPtrEqual: *hasResult = true; *hasResultType = true; break;
+    case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpPtrDiff: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+    case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+    case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+    case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+    case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+    case OpTraceNV: *hasResult = false; *hasResultType = false; break;
+    case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+    case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+    case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+    case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+    case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+    case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+    case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+    case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case OpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break;
+    case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpDecorateString: *hasResult = false; *hasResultType = false; break;
+    case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+    case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+    }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
 // Overload operator| for mask bit combining
 
 inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); }
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
index 2f3cf25..463c756 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
@@ -61,7 +61,7 @@ void CFG::build_immediate_dominators()
 			if (immediate_dominators[block])
 			{
 				assert(immediate_dominators[edge]);
-				immediate_dominators[block] = find_common_dominator(block, edge);
+				immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge);
 			}
 			else
 				immediate_dominators[block] = edge;
@@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const
 	// We have a back edge if the visit order is set with the temporary magic value 0.
 	// Crossing edges will have already been recorded with a visit order.
 	auto itr = visit_order.find(to);
-	assert(itr != end(visit_order));
-	return itr->second.get() == 0;
+	return itr != end(visit_order) && itr->second.get() == 0;
+}
+
+bool CFG::has_visited_forward_edge(uint32_t to) const
+{
+	// If > 0, we have visited the edge already, and this is not a back edge branch.
+	auto itr = visit_order.find(to);
+	return itr != end(visit_order) && itr->second.get() > 0;
 }
 
 bool CFG::post_order_visit(uint32_t block_id)
@@ -83,14 +89,30 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// If we have already branched to this block (back edge), stop recursion.
 	// If our branches are back-edges, we do not record them.
 	// We have to record crossing edges however.
-	if (visit_order[block_id].get() >= 0)
-		return !is_back_edge(block_id);
+	if (has_visited_forward_edge(block_id))
+		return true;
+	else if (is_back_edge(block_id))
+		return false;
 
 	// Block back-edges from recursively revisiting ourselves.
 	visit_order[block_id].get() = 0;
 
-	// First visit our branch targets.
 	auto &block = compiler.get<SPIRBlock>(block_id);
+
+	// If this is a loop header, add an implied branch to the merge target.
+	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
+	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
+	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
+	// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
+
+	// Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop
+	// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
+	// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
+	// but for loops, only the header might end up actually branching to merge block.
+	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+		add_branch(block_id, block.merge_block);
+
+	// First visit our branch targets.
 	switch (block.terminator)
 	{
 	case SPIRBlock::Direct:
@@ -119,12 +141,56 @@ bool CFG::post_order_visit(uint32_t block_id)
 		break;
 	}
 
-	// If this is a loop header, add an implied branch to the merge target.
-	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
-	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
-	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
-	if (block.merge == SPIRBlock::MergeLoop)
-		add_branch(block_id, block.merge_block);
+	// If this is a selection merge, add an implied branch to the merge target.
+	// This is needed to avoid cases where an inner branch dominates the outer branch.
+	// This can happen if one of the branches exit early, e.g.:
+	// if (cond) { ...; break; } else { var = 100 } use_var(var);
+	// We can use the variable without a Phi since there is only one possible parent here.
+	// However, in this case, we need to hoist out the inner variable to outside the branch.
+	// Use same strategy as loops.
+	if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
+	{
+		// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
+		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
+		// will be hoisted out to outside the selection merge.
+		// If size > 1, the variable will be automatically hoisted, so we should not mess with it.
+		// The exception here is switch blocks, where we can have multiple edges to merge block,
+		// all coming from same scope, so be more conservative in this case.
+		// Adding fake branches unconditionally breaks parameter preservation analysis,
+		// which looks at how variables are accessed through the CFG.
+		auto pred_itr = preceding_edges.find(block.next_block);
+		if (pred_itr != end(preceding_edges))
+		{
+			auto &pred = pred_itr->second;
+			auto succ_itr = succeeding_edges.find(block_id);
+			size_t num_succeeding_edges = 0;
+			if (succ_itr != end(succeeding_edges))
+				num_succeeding_edges = succ_itr->second.size();
+
+			if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1)
+			{
+				// Multiple branches can come from the same scope due to "break;", so we need to assume that all branches
+				// come from same case scope in worst case, even if there are multiple preceding edges.
+				// If we have more than one succeeding edge from the block header, it should be impossible
+				// to have a dominator be inside the block.
+				// Only case this can go wrong is if we have 2 or more edges from block header and
+				// 2 or more edges to merge block, and still have dominator be inside a case label.
+				if (!pred.empty())
+					add_branch(block_id, block.next_block);
+			}
+			else
+			{
+				if (pred.size() == 1 && *pred.begin() != block_id)
+					add_branch(block_id, block.next_block);
+			}
+		}
+		else
+		{
+			// If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it.
+			// We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge.
+			add_branch(block_id, block.next_block);
+		}
+	}
 
 	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
 	visit_order[block_id].get() = ++visit_count;
@@ -152,6 +218,111 @@ void CFG::add_branch(uint32_t from, uint32_t to)
 	add_unique(succeeding_edges[from], to);
 }
 
+uint32_t CFG::find_loop_dominator(uint32_t block_id) const
+{
+	while (block_id != SPIRBlock::NoDominator)
+	{
+		auto itr = preceding_edges.find(block_id);
+		if (itr == end(preceding_edges))
+			return SPIRBlock::NoDominator;
+		if (itr->second.empty())
+			return SPIRBlock::NoDominator;
+
+		uint32_t pred_block_id = SPIRBlock::NoDominator;
+		bool ignore_loop_header = false;
+
+		// If we are a merge block, go directly to the header block.
+		// Only consider a loop dominator if we are branching from inside a block to a loop header.
+		// NOTE: In the CFG we forced an edge from header to merge block always to support variable scopes properly.
+		for (auto &pred : itr->second)
+		{
+			auto &pred_block = compiler.get<SPIRBlock>(pred);
+			if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id))
+			{
+				pred_block_id = pred;
+				ignore_loop_header = true;
+				break;
+			}
+			else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id))
+			{
+				pred_block_id = pred;
+				break;
+			}
+		}
+
+		// No merge block means we can just pick any edge. Loop headers dominate the inner loop, so any path we
+		// take will lead there.
+		if (pred_block_id == SPIRBlock::NoDominator)
+			pred_block_id = itr->second.front();
+
+		block_id = pred_block_id;
+
+		if (!ignore_loop_header && block_id)
+		{
+			auto &block = compiler.get<SPIRBlock>(block_id);
+			if (block.merge == SPIRBlock::MergeLoop)
+				return block_id;
+		}
+	}
+
+	return block_id;
+}
+
+bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const
+{
+	// Walk backwards, starting from "to" block.
+	// Only follow pred edges if they have a 1:1 relationship, or a merge relationship.
+	// If we cannot find a path to "from", we must assume that to is inside control flow in some way.
+
+	auto &from_block = compiler.get<SPIRBlock>(from);
+	BlockID ignore_block_id = 0;
+	if (from_block.merge == SPIRBlock::MergeLoop)
+		ignore_block_id = from_block.merge_block;
+
+	while (to != from)
+	{
+		auto pred_itr = preceding_edges.find(to);
+		if (pred_itr == end(preceding_edges))
+			return false;
+
+		DominatorBuilder builder(*this);
+		for (auto &edge : pred_itr->second)
+			builder.add_block(edge);
+
+		uint32_t dominator = builder.get_dominator();
+		if (dominator == 0)
+			return false;
+
+		auto &dom = compiler.get<SPIRBlock>(dominator);
+
+		bool true_path_ignore = false;
+		bool false_path_ignore = false;
+		if (ignore_block_id && dom.terminator == SPIRBlock::Select)
+		{
+			auto &true_block = compiler.get<SPIRBlock>(dom.true_block);
+			auto &false_block = compiler.get<SPIRBlock>(dom.false_block);
+			auto &ignore_block = compiler.get<SPIRBlock>(ignore_block_id);
+			true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block);
+			false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block);
+		}
+
+		if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) ||
+		    (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) ||
+		    (dom.terminator == SPIRBlock::Direct && dom.next_block == to) ||
+		    (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) ||
+		    (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore))
+		{
+			// Allow walking selection constructs if the other branch reaches out of a loop construct.
+			// It cannot be in-scope anymore.
+			to = dominator;
+		}
+		else
+			return false;
+	}
+
+	return true;
+}
+
 DominatorBuilder::DominatorBuilder(const CFG &cfg_)
     : cfg(cfg_)
 {
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
index be10371..5f62cca 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
@@ -88,11 +88,17 @@ public:
 			return;
 		seen_blocks.insert(block);
 
-		op(block);
-		for (auto b : get_succeeding_edges(block))
-			walk_from(seen_blocks, b, op);
+		if (op(block))
+		{
+			for (auto b : get_succeeding_edges(block))
+				walk_from(seen_blocks, b, op);
+		}
 	}
 
+	uint32_t find_loop_dominator(uint32_t block) const;
+
+	bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const;
+
 private:
 	struct VisitOrder
 	{
@@ -125,6 +131,7 @@ private:
 	uint32_t visit_count = 0;
 
 	bool is_back_edge(uint32_t to) const;
+	bool has_visited_forward_edge(uint32_t to) const;
 };
 
 class DominatorBuilder
diff --git a/src/3rdparty/SPIRV-Cross/spirv_common.hpp b/src/3rdparty/SPIRV-Cross/spirv_common.hpp
index 0cf1f56..c1c6fc8 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_common.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_common.hpp
@@ -20,6 +20,7 @@
 #include "spirv.hpp"
 #include "spirv_cross_containers.hpp"
 #include "spirv_cross_error_handling.hpp"
+#include <functional>
 
 // A bit crude, but allows projects which embed SPIRV-Cross statically to
 // effectively hide all the symbols from other projects.
@@ -183,14 +184,14 @@ std::string join(Ts &&... ts)
 	return stream.str();
 }
 
-inline std::string merge(const SmallVector<std::string> &list)
+inline std::string merge(const SmallVector<std::string> &list, const char *between = ", ")
 {
 	StringStream<> stream;
 	for (auto &elem : list)
 	{
 		stream << elem;
 		if (&elem != &list.back())
-			stream << ", ";
+			stream << between;
 	}
 	return stream.str();
 }
@@ -270,20 +271,6 @@ struct Instruction
 	uint32_t length = 0;
 };
 
-// Helper for Variant interface.
-struct IVariant
-{
-	virtual ~IVariant() = default;
-	virtual IVariant *clone(ObjectPoolBase *pool) = 0;
-	uint32_t self = 0;
-};
-
-#define SPIRV_CROSS_DECLARE_CLONE(T)                                \
-	IVariant *clone(ObjectPoolBase *pool) override                  \
-	{                                                               \
-		return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
-	}
-
 enum Types
 {
 	TypeNone,
@@ -299,9 +286,140 @@ enum Types
 	TypeCombinedImageSampler,
 	TypeAccessChain,
 	TypeUndef,
+	TypeString,
 	TypeCount
 };
 
+template <Types type>
+class TypedID;
+
+template <>
+class TypedID<TypeNone>
+{
+public:
+	TypedID() = default;
+	TypedID(uint32_t id_)
+	    : id(id_)
+	{
+	}
+
+	template <Types U>
+	TypedID(const TypedID<U> &other)
+	{
+		*this = other;
+	}
+
+	template <Types U>
+	TypedID &operator=(const TypedID<U> &other)
+	{
+		id = uint32_t(other);
+		return *this;
+	}
+
+	// Implicit conversion to u32 is desired here.
+	// As long as we block implicit conversion between TypedID<A> and TypedID<B> we're good.
+	operator uint32_t() const
+	{
+		return id;
+	}
+
+	template <Types U>
+	operator TypedID<U>() const
+	{
+		return TypedID<U>(*this);
+	}
+
+	bool operator==(const TypedID &other) const
+	{
+		return id == other.id;
+	}
+
+	bool operator!=(const TypedID &other) const
+	{
+		return id != other.id;
+	}
+
+	template <Types type>
+	bool operator==(const TypedID<type> &other) const
+	{
+		return id == uint32_t(other);
+	}
+
+	template <Types type>
+	bool operator!=(const TypedID<type> &other) const
+	{
+		return id != uint32_t(other);
+	}
+
+private:
+	uint32_t id = 0;
+};
+
+template <Types type>
+class TypedID
+{
+public:
+	TypedID() = default;
+	TypedID(uint32_t id_)
+	    : id(id_)
+	{
+	}
+
+	explicit TypedID(const TypedID<TypeNone> &other)
+	    : id(uint32_t(other))
+	{
+	}
+
+	operator uint32_t() const
+	{
+		return id;
+	}
+
+	bool operator==(const TypedID &other) const
+	{
+		return id == other.id;
+	}
+
+	bool operator!=(const TypedID &other) const
+	{
+		return id != other.id;
+	}
+
+	bool operator==(const TypedID<TypeNone> &other) const
+	{
+		return id == uint32_t(other);
+	}
+
+	bool operator!=(const TypedID<TypeNone> &other) const
+	{
+		return id != uint32_t(other);
+	}
+
+private:
+	uint32_t id = 0;
+};
+
+using VariableID = TypedID<TypeVariable>;
+using TypeID = TypedID<TypeType>;
+using ConstantID = TypedID<TypeConstant>;
+using FunctionID = TypedID<TypeFunction>;
+using BlockID = TypedID<TypeBlock>;
+using ID = TypedID<TypeNone>;
+
+// Helper for Variant interface.
+struct IVariant
+{
+	virtual ~IVariant() = default;
+	virtual IVariant *clone(ObjectPoolBase *pool) = 0;
+	ID self = 0;
+};
+
+#define SPIRV_CROSS_DECLARE_CLONE(T)                                \
+	IVariant *clone(ObjectPoolBase *pool) override                  \
+	{                                                               \
+		return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
+	}
+
 struct SPIRUndef : IVariant
 {
 	enum
@@ -309,15 +427,32 @@ struct SPIRUndef : IVariant
 		type = TypeUndef
 	};
 
-	explicit SPIRUndef(uint32_t basetype_)
+	explicit SPIRUndef(TypeID basetype_)
 	    : basetype(basetype_)
 	{
 	}
-	uint32_t basetype;
+	TypeID basetype;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRUndef)
 };
 
+struct SPIRString : IVariant
+{
+	enum
+	{
+		type = TypeString
+	};
+
+	explicit SPIRString(std::string str_)
+	    : str(std::move(str_))
+	{
+	}
+
+	std::string str;
+
+	SPIRV_CROSS_DECLARE_CLONE(SPIRString)
+};
+
 // This type is only used by backends which need to access the combined image and sampler IDs separately after
 // the OpSampledImage opcode.
 struct SPIRCombinedImageSampler : IVariant
@@ -326,15 +461,15 @@ struct SPIRCombinedImageSampler : IVariant
 	{
 		type = TypeCombinedImageSampler
 	};
-	SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_)
+	SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_)
 	    : combined_type(type_)
 	    , image(image_)
 	    , sampler(sampler_)
 	{
 	}
-	uint32_t combined_type;
-	uint32_t image;
-	uint32_t sampler;
+	TypeID combined_type;
+	VariableID image;
+	VariableID sampler;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler)
 };
@@ -346,16 +481,18 @@ struct SPIRConstantOp : IVariant
 		type = TypeConstantOp
 	};
 
-	SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length)
+	SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length)
 	    : opcode(op)
-	    , arguments(args, args + length)
 	    , basetype(result_type)
 	{
+		arguments.reserve(length);
+		for (uint32_t i = 0; i < length; i++)
+			arguments.push_back(args[i]);
 	}
 
 	spv::Op opcode;
 	SmallVector<uint32_t> arguments;
-	uint32_t basetype;
+	TypeID basetype;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp)
 };
@@ -418,11 +555,11 @@ struct SPIRType : IVariant
 
 	spv::StorageClass storage = spv::StorageClassGeneric;
 
-	SmallVector<uint32_t> member_types;
+	SmallVector<TypeID> member_types;
 
 	struct ImageType
 	{
-		uint32_t type;
+		TypeID type;
 		spv::Dim dim;
 		bool depth;
 		bool arrayed;
@@ -435,11 +572,11 @@ struct SPIRType : IVariant
 	// Structs can be declared multiple times if they are used as part of interface blocks.
 	// We want to detect this so that we only emit the struct definition once.
 	// Since we cannot rely on OpName to be equal, we need to figure out aliases.
-	uint32_t type_alias = 0;
+	TypeID type_alias = 0;
 
 	// Denotes the type which this type is based on.
 	// Allows the backend to traverse how a complex type is built up during access chains.
-	uint32_t parent_type = 0;
+	TypeID parent_type = 0;
 
 	// Used in backends to avoid emitting members with conflicting names.
 	std::unordered_set<std::string> member_name_cache;
@@ -458,6 +595,7 @@ struct SPIRExtension : IVariant
 	{
 		Unsupported,
 		GLSL,
+		SPV_debug_info,
 		SPV_AMD_shader_ballot,
 		SPV_AMD_shader_explicit_vertex_parameter,
 		SPV_AMD_shader_trinary_minmax,
@@ -477,7 +615,7 @@ struct SPIRExtension : IVariant
 // so in order to avoid conflicts, we can't stick them in the ids array.
 struct SPIREntryPoint
 {
-	SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name)
+	SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name)
 	    : self(self_)
 	    , name(entry_name)
 	    , orig_name(entry_name)
@@ -486,10 +624,10 @@ struct SPIREntryPoint
 	}
 	SPIREntryPoint() = default;
 
-	uint32_t self = 0;
+	FunctionID self = 0;
 	std::string name;
 	std::string orig_name;
-	SmallVector<uint32_t> interface_variables;
+	SmallVector<VariableID> interface_variables;
 
 	Bitset flags;
 	struct
@@ -510,7 +648,7 @@ struct SPIRExpression : IVariant
 	};
 
 	// Only created by the backend target to avoid creating tons of temporaries.
-	SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_)
+	SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_)
 	    : expression(move(expr))
 	    , expression_type(expression_type_)
 	    , immutable(immutable_)
@@ -520,14 +658,14 @@ struct SPIRExpression : IVariant
 	// If non-zero, prepend expression with to_expression(base_expression).
 	// Used in amortizing multiple calls to to_expression()
 	// where in certain cases that would quickly force a temporary when not needed.
-	uint32_t base_expression = 0;
+	ID base_expression = 0;
 
 	std::string expression;
-	uint32_t expression_type = 0;
+	TypeID expression_type = 0;
 
 	// If this expression is a forwarded load,
 	// allow us to reference the original variable.
-	uint32_t loaded_from = 0;
+	ID loaded_from = 0;
 
 	// If this expression will never change, we can avoid lots of temporaries
 	// in high level source.
@@ -543,11 +681,11 @@ struct SPIRExpression : IVariant
 	bool access_chain = false;
 
 	// A list of expressions which this expression depends on.
-	SmallVector<uint32_t> expression_dependencies;
+	SmallVector<ID> expression_dependencies;
 
 	// By reading this expression, we implicitly read these expressions as well.
 	// Used by access chain Store and Load since we read multiple expressions in this case.
-	SmallVector<uint32_t> implied_read_expressions;
+	SmallVector<ID> implied_read_expressions;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
 };
@@ -559,12 +697,12 @@ struct SPIRFunctionPrototype : IVariant
 		type = TypeFunctionPrototype
 	};
 
-	explicit SPIRFunctionPrototype(uint32_t return_type_)
+	explicit SPIRFunctionPrototype(TypeID return_type_)
 	    : return_type(return_type_)
 	{
 	}
 
-	uint32_t return_type;
+	TypeID return_type;
 	SmallVector<uint32_t> parameter_types;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype)
@@ -639,23 +777,23 @@ struct SPIRBlock : IVariant
 	Terminator terminator = Unknown;
 	Merge merge = MergeNone;
 	Hints hint = HintNone;
-	uint32_t next_block = 0;
-	uint32_t merge_block = 0;
-	uint32_t continue_block = 0;
+	BlockID next_block = 0;
+	BlockID merge_block = 0;
+	BlockID continue_block = 0;
 
-	uint32_t return_value = 0; // If 0, return nothing (void).
-	uint32_t condition = 0;
-	uint32_t true_block = 0;
-	uint32_t false_block = 0;
-	uint32_t default_block = 0;
+	ID return_value = 0; // If 0, return nothing (void).
+	ID condition = 0;
+	BlockID true_block = 0;
+	BlockID false_block = 0;
+	BlockID default_block = 0;
 
 	SmallVector<Instruction> ops;
 
 	struct Phi
 	{
-		uint32_t local_variable; // flush local variable ...
-		uint32_t parent; // If we're in from_block and want to branch into this block ...
-		uint32_t function_variable; // to this function-global "phi" variable first.
+		ID local_variable; // flush local variable ...
+		BlockID parent; // If we're in from_block and want to branch into this block ...
+		VariableID function_variable; // to this function-global "phi" variable first.
 	};
 
 	// Before entering this block flush out local variables to magical "phi" variables.
@@ -663,16 +801,16 @@ struct SPIRBlock : IVariant
 
 	// Declare these temporaries before beginning the block.
 	// Used for handling complex continue blocks which have side effects.
-	SmallVector<std::pair<uint32_t, uint32_t>> declare_temporary;
+	SmallVector<std::pair<TypeID, ID>> declare_temporary;
 
 	// Declare these temporaries, but only conditionally if this block turns out to be
 	// a complex loop header.
-	SmallVector<std::pair<uint32_t, uint32_t>> potential_declare_temporary;
+	SmallVector<std::pair<TypeID, ID>> potential_declare_temporary;
 
 	struct Case
 	{
 		uint32_t value;
-		uint32_t block;
+		BlockID block;
 	};
 	SmallVector<Case> cases;
 
@@ -686,23 +824,27 @@ struct SPIRBlock : IVariant
 	// Do we need a ladder variable to defer breaking out of a loop construct after a switch block?
 	bool need_ladder_break = false;
 
+	// If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch.
+	// Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi.
+	BlockID ignore_phi_from_block = 0;
+
 	// The dominating block which this block might be within.
 	// Used in continue; blocks to determine if we really need to write continue.
-	uint32_t loop_dominator = 0;
+	BlockID loop_dominator = 0;
 
 	// All access to these variables are dominated by this block,
 	// so before branching anywhere we need to make sure that we declare these variables.
-	SmallVector<uint32_t> dominated_variables;
+	SmallVector<VariableID> dominated_variables;
 
 	// These are variables which should be declared in a for loop header, if we
 	// fail to use a classic for-loop,
 	// we remove these variables, and fall back to regular variables outside the loop.
-	SmallVector<uint32_t> loop_variables;
+	SmallVector<VariableID> loop_variables;
 
 	// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
 	// sub-group-like operations.
 	// Make sure that we only use these expressions in the original block.
-	SmallVector<uint32_t> invalidate_expressions;
+	SmallVector<ID> invalidate_expressions;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRBlock)
 };
@@ -714,7 +856,7 @@ struct SPIRFunction : IVariant
 		type = TypeFunction
 	};
 
-	SPIRFunction(uint32_t return_type_, uint32_t function_type_)
+	SPIRFunction(TypeID return_type_, TypeID function_type_)
 	    : return_type(return_type_)
 	    , function_type(function_type_)
 	{
@@ -722,8 +864,8 @@ struct SPIRFunction : IVariant
 
 	struct Parameter
 	{
-		uint32_t type;
-		uint32_t id;
+		TypeID type;
+		ID id;
 		uint32_t read_count;
 		uint32_t write_count;
 
@@ -745,33 +887,40 @@ struct SPIRFunction : IVariant
 	// or a global ID.
 	struct CombinedImageSamplerParameter
 	{
-		uint32_t id;
-		uint32_t image_id;
-		uint32_t sampler_id;
+		VariableID id;
+		VariableID image_id;
+		VariableID sampler_id;
 		bool global_image;
 		bool global_sampler;
 		bool depth;
 	};
 
-	uint32_t return_type;
-	uint32_t function_type;
+	TypeID return_type;
+	TypeID function_type;
 	SmallVector<Parameter> arguments;
 
 	// Can be used by backends to add magic arguments.
 	// Currently used by combined image/sampler implementation.
 
 	SmallVector<Parameter> shadow_arguments;
-	SmallVector<uint32_t> local_variables;
-	uint32_t entry_block = 0;
-	SmallVector<uint32_t> blocks;
+	SmallVector<VariableID> local_variables;
+	BlockID entry_block = 0;
+	SmallVector<BlockID> blocks;
 	SmallVector<CombinedImageSamplerParameter> combined_parameters;
 
-	void add_local_variable(uint32_t id)
+	struct EntryLine
+	{
+		uint32_t file_id = 0;
+		uint32_t line_literal = 0;
+	};
+	EntryLine entry_line;
+
+	void add_local_variable(VariableID id)
 	{
 		local_variables.push_back(id);
 	}
 
-	void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false)
+	void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false)
 	{
 		// Arguments are read-only until proven otherwise.
 		arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable });
@@ -792,7 +941,7 @@ struct SPIRFunction : IVariant
 	// On function entry, make sure to copy a constant array into thread addr space to work around
 	// the case where we are passing a constant array by value to a function on backends which do not
 	// consider arrays value types.
-	SmallVector<uint32_t> constant_arrays_needed_on_stack;
+	SmallVector<ID> constant_arrays_needed_on_stack;
 
 	bool active = false;
 	bool flush_undeclared = true;
@@ -808,7 +957,7 @@ struct SPIRAccessChain : IVariant
 		type = TypeAccessChain
 	};
 
-	SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
+	SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
 	                int32_t static_index_)
 	    : basetype(basetype_)
 	    , storage(storage_)
@@ -823,20 +972,20 @@ struct SPIRAccessChain : IVariant
 	// which has no usable buffer type ala GLSL SSBOs.
 	// StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses.
 
-	uint32_t basetype;
+	TypeID basetype;
 	spv::StorageClass storage;
 	std::string base;
 	std::string dynamic_index;
 	int32_t static_index;
 
-	uint32_t loaded_from = 0;
+	VariableID loaded_from = 0;
 	uint32_t matrix_stride = 0;
 	bool row_major_matrix = false;
 	bool immutable = false;
 
 	// By reading this expression, we implicitly read these expressions as well.
 	// Used by access chain Store and Load since we read multiple expressions in this case.
-	SmallVector<uint32_t> implied_read_expressions;
+	SmallVector<ID> implied_read_expressions;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain)
 };
@@ -849,7 +998,7 @@ struct SPIRVariable : IVariant
 	};
 
 	SPIRVariable() = default;
-	SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0)
+	SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0)
 	    : basetype(basetype_)
 	    , storage(storage_)
 	    , initializer(initializer_)
@@ -857,11 +1006,11 @@ struct SPIRVariable : IVariant
 	{
 	}
 
-	uint32_t basetype = 0;
+	TypeID basetype = 0;
 	spv::StorageClass storage = spv::StorageClassGeneric;
 	uint32_t decoration = 0;
-	uint32_t initializer = 0;
-	uint32_t basevariable = 0;
+	ID initializer = 0;
+	VariableID basevariable = 0;
 
 	SmallVector<uint32_t> dereference_chain;
 	bool compat_builtin = false;
@@ -871,10 +1020,10 @@ struct SPIRVariable : IVariant
 	// When we read the variable as an expression, just forward
 	// shadowed_id as the expression.
 	bool statically_assigned = false;
-	uint32_t static_expression = 0;
+	ID static_expression = 0;
 
 	// Temporaries which can remain forwarded as long as this variable is not modified.
-	SmallVector<uint32_t> dependees;
+	SmallVector<ID> dependees;
 	bool forwardable = true;
 
 	bool deferred_declaration = false;
@@ -887,7 +1036,7 @@ struct SPIRVariable : IVariant
 	uint32_t remapped_components = 0;
 
 	// The block which dominates all access to this variable.
-	uint32_t dominator = 0;
+	BlockID dominator = 0;
 	// If true, this variable is a loop variable, when accessing the variable
 	// outside a loop,
 	// we should statically forward it.
@@ -907,7 +1056,8 @@ struct SPIRConstant : IVariant
 		type = TypeConstant
 	};
 
-	union Constant {
+	union Constant
+	{
 		uint32_t u32;
 		int32_t i32;
 		float f32;
@@ -921,15 +1071,12 @@ struct SPIRConstant : IVariant
 	{
 		Constant r[4];
 		// If != 0, this element is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4];
+		ID id[4];
 		uint32_t vecsize = 1;
 
-		// Workaround for MSVC 2013, initializing an array breaks.
 		ConstantVector()
 		{
 			memset(r, 0, sizeof(r));
-			for (unsigned i = 0; i < 4; i++)
-				id[i] = 0;
 		}
 	};
 
@@ -937,15 +1084,8 @@ struct SPIRConstant : IVariant
 	{
 		ConstantVector c[4];
 		// If != 0, this column is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4];
+		ID id[4];
 		uint32_t columns = 1;
-
-		// Workaround for MSVC 2013, initializing an array breaks.
-		ConstantMatrix()
-		{
-			for (unsigned i = 0; i < 4; i++)
-				id[i] = 0;
-		}
 	};
 
 	static inline float f16_to_f32(uint16_t u16_value)
@@ -955,7 +1095,8 @@ struct SPIRConstant : IVariant
 		int e = (u16_value >> 10) & 0x1f;
 		int m = (u16_value >> 0) & 0x3ff;
 
-		union {
+		union
+		{
 			float f32;
 			uint32_t u32;
 		} u;
@@ -1109,16 +1250,18 @@ struct SPIRConstant : IVariant
 
 	SPIRConstant() = default;
 
-	SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
+	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
-		subconstants.insert(std::end(subconstants), elements, elements + num_elements);
+		subconstants.reserve(num_elements);
+		for (uint32_t i = 0; i < num_elements; i++)
+			subconstants.push_back(elements[i]);
 		specialization = specialized;
 	}
 
 	// Construct scalar (32-bit).
-	SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized)
+	SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
@@ -1128,7 +1271,7 @@ struct SPIRConstant : IVariant
 	}
 
 	// Construct scalar (64-bit).
-	SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized)
+	SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
@@ -1138,7 +1281,7 @@ struct SPIRConstant : IVariant
 	}
 
 	// Construct vectors and matrices.
-	SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
+	SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
 	             bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
@@ -1170,7 +1313,7 @@ struct SPIRConstant : IVariant
 		}
 	}
 
-	uint32_t constant_type = 0;
+	TypeID constant_type = 0;
 	ConstantMatrix m;
 
 	// If this constant is a specialization constant (i.e. created with OpSpecConstant*).
@@ -1182,7 +1325,7 @@ struct SPIRConstant : IVariant
 	bool is_used_as_lut = false;
 
 	// For composites which are constant arrays, etc.
-	SmallVector<uint32_t> subconstants;
+	SmallVector<ConstantID> subconstants;
 
 	// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
 	// and uses them to initialize the constant. This allows the user
@@ -1317,9 +1460,9 @@ public:
 		return type;
 	}
 
-	uint32_t get_id() const
+	ID get_id() const
 	{
-		return holder ? holder->self : 0;
+		return holder ? holder->self : ID(0);
 	}
 
 	bool empty() const
@@ -1368,12 +1511,57 @@ T &variant_set(Variant &var, P &&... args)
 
 struct AccessChainMeta
 {
-	uint32_t storage_packed_type = 0;
+	uint32_t storage_physical_type = 0;
 	bool need_transpose = false;
 	bool storage_is_packed = false;
 	bool storage_is_invariant = false;
 };
 
+enum ExtendedDecorations
+{
+	// Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding.
+	SPIRVCrossDecorationBufferBlockRepacked = 0,
+
+	// A type in a buffer block might be declared with a different physical type than the logical type.
+	// If this is not set, PhysicalTypeID == the SPIR-V type as declared.
+	SPIRVCrossDecorationPhysicalTypeID,
+
+	// Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends.
+	// If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing
+	// is converting float3 to packed_float3 for example.
+	// If this is marked on a struct, it means the struct itself must use only Packed types for all its members.
+	SPIRVCrossDecorationPhysicalTypePacked,
+
+	// The padding in bytes before declaring this struct member.
+	// If used on a struct type, marks the target size of a struct.
+	SPIRVCrossDecorationPaddingTarget,
+
+	SPIRVCrossDecorationInterfaceMemberIndex,
+	SPIRVCrossDecorationInterfaceOrigID,
+	SPIRVCrossDecorationResourceIndexPrimary,
+	// Used for decorations like resource indices for samplers when part of combined image samplers.
+	// A variable might need to hold two resource indices in this case.
+	SPIRVCrossDecorationResourceIndexSecondary,
+	// Used for resource indices for multiplanar images when part of combined image samplers.
+	SPIRVCrossDecorationResourceIndexTertiary,
+	SPIRVCrossDecorationResourceIndexQuaternary,
+
+	// Marks a buffer block for using explicit offsets (GLSL/HLSL).
+	SPIRVCrossDecorationExplicitOffset,
+
+	// Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase().
+	// In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables.
+	SPIRVCrossDecorationBuiltInDispatchBase,
+
+	// Apply to a variable that is a function parameter; marks it as being a "dynamic"
+	// combined image-sampler. In MSL, this is used when a function parameter might hold
+	// either a regular combined image-sampler or one that has an attached sampler
+	// Y'CbCr conversion.
+	SPIRVCrossDecorationDynamicImageSampler,
+
+	SPIRVCrossDecorationCount
+};
+
 struct Meta
 {
 	struct Decoration
@@ -1396,13 +1584,17 @@ struct Meta
 		spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
 		bool builtin = false;
 
-		struct
+		struct Extended
 		{
-			uint32_t packed_type = 0;
-			bool packed = false;
-			uint32_t ib_member_index = ~(0u);
-			uint32_t ib_orig_id = 0;
-			uint32_t argument_buffer_id = ~(0u);
+			Extended()
+			{
+				// MSVC 2013 workaround to init like this.
+				for (auto &v : values)
+					v = 0;
+			}
+
+			Bitset flags;
+			uint32_t values[SPIRVCrossDecorationCount];
 		} extended;
 	};
 
@@ -1510,4 +1702,16 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode)
 }
 } // namespace SPIRV_CROSS_NAMESPACE
 
+namespace std
+{
+template <SPIRV_CROSS_NAMESPACE::Types type>
+struct hash<SPIRV_CROSS_NAMESPACE::TypedID<type>>
+{
+	size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID<type> &value) const
+	{
+		return std::hash<uint32_t>()(value);
+	}
+};
+} // namespace std
+
 #endif
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
index 90566c1..25966b3 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
@@ -321,6 +321,8 @@ string CompilerCPP::compile()
 	backend.explicit_struct_type = true;
 	backend.use_initializer_list = true;
 
+	fixup_type_alias();
+	reorder_type_alias();
 	build_function_control_flow_graphs_and_analyze();
 	update_active_builtins();
 
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
index 6b66b74..c73ecdf 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
@@ -17,6 +17,7 @@
 #include "spirv_cross.hpp"
 #include "GLSL.std.450.h"
 #include "spirv_cfg.hpp"
+#include "spirv_common.hpp"
 #include "spirv_parser.hpp"
 #include <algorithm>
 #include <cstring>
@@ -87,6 +88,10 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
 
 bool Compiler::block_is_pure(const SPIRBlock &block)
 {
+	// This is a global side effect of the function.
+	if (block.terminator == SPIRBlock::Kill)
+		return false;
+
 	for (auto &i : block.ops)
 	{
 		auto ops = stream(i);
@@ -155,6 +160,10 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 
 			// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
 
+		case OpDemoteToHelperInvocationEXT:
+			// This is a global side effect of the function.
+			return false;
+
 		default:
 			break;
 		}
@@ -176,7 +185,7 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const
 		{
 			// If the alias master has been specially packed, we will have emitted a clean variant as well,
 			// so skip the name aliasing here.
-			if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+			if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
 				return to_name(type.type_alias);
 		}
 	}
@@ -264,6 +273,15 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
 	return var;
 }
 
+StorageClass Compiler::get_backing_variable_storage(uint32_t ptr)
+{
+	auto *var = maybe_get_backing_variable(ptr);
+	if (var)
+		return var->storage;
+	else
+		return expression_type(ptr).storage;
+}
+
 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
 {
 	auto &e = get<SPIRExpression>(expr);
@@ -568,7 +586,7 @@ ShaderResources Compiler::get_shader_resources() const
 	return get_shader_resources(nullptr);
 }
 
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> &active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
 {
 	return get_shader_resources(&active_variables);
 }
@@ -708,6 +726,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 	case OpAtomicAnd:
 	case OpAtomicOr:
 	case OpAtomicXor:
+	case OpArrayLength:
 		// Invalid SPIR-V.
 		if (length < 3)
 			return false;
@@ -724,16 +743,16 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 	return true;
 }
 
-unordered_set<uint32_t> Compiler::get_active_interface_variables() const
+unordered_set<VariableID> Compiler::get_active_interface_variables() const
 {
 	// Traverse the call graph and find all interface variables which are in use.
-	unordered_set<uint32_t> variables;
+	unordered_set<VariableID> variables;
 	InterfaceVariableAccessHandler handler(*this, variables);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
 
 	// Make sure we preserve output variables which are only initialized, but never accessed by any code.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
-		if (var.storage == StorageClassOutput && var.initializer != 0)
+		if (var.storage == StorageClassOutput && var.initializer != ID(0))
 			variables.insert(var.self);
 	});
 
@@ -744,16 +763,18 @@ unordered_set<uint32_t> Compiler::get_active_interface_variables() const
 	return variables;
 }
 
-void Compiler::set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables)
+void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
 {
 	active_interface_variables = move(active_variables);
 	check_active_interface_variables = true;
 }
 
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
 {
 	ShaderResources res;
 
+	bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
 
@@ -771,7 +792,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 			if (has_decoration(type.self, DecorationBlock))
 			{
 				res.stage_inputs.push_back(
-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
 			}
 			else
 				res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -787,7 +808,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 			if (has_decoration(type.self, DecorationBlock))
 			{
 				res.stage_outputs.push_back(
-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
 			}
 			else
 				res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -796,19 +817,19 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 		else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock))
 		{
 			res.uniform_buffers.push_back(
-			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
 		}
 		// Old way to declare SSBOs.
 		else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock))
 		{
 			res.storage_buffers.push_back(
-			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
 		}
 		// Modern way to declare SSBOs.
 		else if (type.storage == StorageClassStorageBuffer)
 		{
 			res.storage_buffers.push_back(
-			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
 		}
 		// Push constant blocks
 		else if (type.storage == StorageClassPushConstant)
@@ -872,65 +893,6 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
 	return false;
 }
 
-void Compiler::fixup_type_alias()
-{
-	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
-	// FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
-	// alias if the slave type is a block type.
-	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
-		if (type.type_alias && type_is_block_like(type))
-		{
-			// Become the master.
-			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
-				if (other_id == type.self)
-					return;
-
-				if (other_type.type_alias == type.type_alias)
-					other_type.type_alias = type.self;
-			});
-
-			this->get<SPIRType>(type.type_alias).type_alias = self;
-			type.type_alias = 0;
-		}
-	});
-
-	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
-		if (type.type_alias && type_is_block_like(type))
-		{
-			// This is not allowed, drop the type_alias.
-			type.type_alias = 0;
-		}
-	});
-
-	// Reorder declaration of types so that the master of the type alias is always emitted first.
-	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
-	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
-	auto &type_ids = ir.ids_for_type[TypeType];
-	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
-	{
-		auto &type = get<SPIRType>(*alias_itr);
-		if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
-		{
-			// We will skip declaring this type, so make sure the type_alias type comes before.
-			auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias);
-			assert(master_itr != end(type_ids));
-
-			if (alias_itr < master_itr)
-			{
-				// Must also swap the type order for the constant-type joined array.
-				auto &joined_types = ir.ids_for_constant_or_type;
-				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
-				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
-				assert(alt_alias_itr != end(joined_types));
-				assert(alt_master_itr != end(joined_types));
-
-				swap(*alias_itr, *master_itr);
-				swap(*alt_alias_itr, *alt_master_itr);
-			}
-		}
-	}
-}
-
 void Compiler::parse_fixup()
 {
 	// Figure out specialization constants for work group sizes.
@@ -964,8 +926,6 @@ void Compiler::parse_fixup()
 				aliased_variables.push_back(var.self);
 		}
 	}
-
-	fixup_type_alias();
 }
 
 void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
@@ -1026,17 +986,17 @@ void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
 	update_name_cache(cache, cache, name);
 }
 
-void Compiler::set_name(uint32_t id, const std::string &name)
+void Compiler::set_name(ID id, const std::string &name)
 {
 	ir.set_name(id, name);
 }
 
-const SPIRType &Compiler::get_type(uint32_t id) const
+const SPIRType &Compiler::get_type(TypeID id) const
 {
 	return get<SPIRType>(id);
 }
 
-const SPIRType &Compiler::get_type_from_variable(uint32_t id) const
+const SPIRType &Compiler::get_type_from_variable(VariableID id) const
 {
 	return get<SPIRType>(get<SPIRVariable>(id).basetype);
 }
@@ -1107,23 +1067,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type)
 	       type.image.dim != DimBuffer;
 }
 
-void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
                                             const std::string &argument)
 {
 	ir.set_member_decoration_string(id, index, decoration, argument);
 }
 
-void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
 {
 	ir.set_member_decoration(id, index, decoration, argument);
 }
 
-void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name)
+void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
 {
 	ir.set_member_name(id, index, name);
 }
 
-const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const
+const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
 {
 	return ir.get_member_name(id, index);
 }
@@ -1139,7 +1099,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const
 	ir.meta[type_id].members[index].qualified_alias = name;
 }
 
-const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const
+const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
 {
 	auto *m = ir.find_meta(type_id);
 	if (m && index < m->members.size())
@@ -1148,32 +1108,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind
 		return ir.get_empty_string();
 }
 
-uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.get_member_decoration(id, index, decoration);
 }
 
-const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
 {
 	return ir.get_member_decoration_bitset(id, index);
 }
 
-bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.has_member_decoration(id, index, decoration);
 }
 
-void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
 {
 	ir.unset_member_decoration(id, index, decoration);
 }
 
-void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument)
+void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
 {
 	ir.set_decoration_string(id, decoration, argument);
 }
 
-void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
 {
 	ir.set_decoration(id, decoration, argument);
 }
@@ -1181,28 +1141,8 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
 {
 	auto &dec = ir.meta[id].decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = true;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = value;
-		break;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		dec.extended.argument_buffer_id = value;
-		break;
-	}
+	dec.extended.flags.set(decoration);
+	dec.extended.values[decoration] = value;
 }
 
 void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
@@ -1210,28 +1150,23 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext
 {
 	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
 	auto &dec = ir.meta[type].members[index];
+	dec.extended.flags.set(decoration);
+	dec.extended.values[decoration] = value;
+}
 
+static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
+{
 	switch (decoration)
 	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = true;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = value;
-		break;
-
+	case SPIRVCrossDecorationResourceIndexPrimary:
+	case SPIRVCrossDecorationResourceIndexSecondary:
+	case SPIRVCrossDecorationResourceIndexTertiary:
+	case SPIRVCrossDecorationResourceIndexQuaternary:
 	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = value;
-		break;
+		return ~(0u);
 
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = value;
-		break;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		dec.extended.argument_buffer_id = value;
-		break;
+	default:
+		return 0;
 	}
 }
 
@@ -1242,25 +1177,11 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco
 		return 0;
 
 	auto &dec = m->decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return uint32_t(dec.extended.packed);
 
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type;
+	if (!dec.extended.flags.get(decoration))
+		return get_default_extended_decoration(decoration);
 
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		return dec.extended.argument_buffer_id;
-	}
-
-	return 0;
+	return dec.extended.values[decoration];
 }
 
 uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1273,25 +1194,9 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index,
 		return 0;
 
 	auto &dec = m->members[index];
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return uint32_t(dec.extended.packed);
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		return dec.extended.argument_buffer_id;
-	}
-
-	return 0;
+	if (!dec.extended.flags.get(decoration))
+		return get_default_extended_decoration(decoration);
+	return dec.extended.values[decoration];
 }
 
 bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
@@ -1301,25 +1206,7 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati
 		return false;
 
 	auto &dec = m->decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return dec.extended.packed;
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type != 0;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index != uint32_t(-1);
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id != 0;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		return dec.extended.argument_buffer_id != 0;
-	}
-
-	return false;
+	return dec.extended.flags.get(decoration);
 }
 
 bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1332,99 +1219,40 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext
 		return false;
 
 	auto &dec = m->members[index];
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return dec.extended.packed;
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type != 0;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index != uint32_t(-1);
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id != 0;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		return dec.extended.argument_buffer_id != uint32_t(-1);
-	}
-
-	return false;
+	return dec.extended.flags.get(decoration);
 }
 
 void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
 {
 	auto &dec = ir.meta[id].decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = false;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = 0;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = ~(0u);
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = 0;
-		break;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		dec.extended.argument_buffer_id = 0;
-		break;
-	}
+	dec.extended.flags.clear(decoration);
+	dec.extended.values[decoration] = 0;
 }
 
 void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
 {
 	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
 	auto &dec = ir.meta[type].members[index];
-
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = false;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = 0;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = ~(0u);
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = 0;
-		break;
-
-	case SPIRVCrossDecorationArgumentBufferID:
-		dec.extended.argument_buffer_id = 0;
-		break;
-	}
+	dec.extended.flags.clear(decoration);
+	dec.extended.values[decoration] = 0;
 }
 
-StorageClass Compiler::get_storage_class(uint32_t id) const
+StorageClass Compiler::get_storage_class(VariableID id) const
 {
 	return get<SPIRVariable>(id).storage;
 }
 
-const std::string &Compiler::get_name(uint32_t id) const
+const std::string &Compiler::get_name(ID id) const
 {
 	return ir.get_name(id);
 }
 
-const std::string Compiler::get_fallback_name(uint32_t id) const
+const std::string Compiler::get_fallback_name(ID id) const
 {
 	return join("_", id);
 }
 
-const std::string Compiler::get_block_fallback_name(uint32_t id) const
+const std::string Compiler::get_block_fallback_name(VariableID id) const
 {
 	auto &var = get<SPIRVariable>(id);
 	if (get_name(id).empty())
@@ -1433,37 +1261,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const
 		return get_name(id);
 }
 
-const Bitset &Compiler::get_decoration_bitset(uint32_t id) const
+const Bitset &Compiler::get_decoration_bitset(ID id) const
 {
 	return ir.get_decoration_bitset(id);
 }
 
-bool Compiler::has_decoration(uint32_t id, Decoration decoration) const
+bool Compiler::has_decoration(ID id, Decoration decoration) const
 {
 	return ir.has_decoration(id, decoration);
 }
 
-const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
 {
 	return ir.get_decoration_string(id, decoration);
 }
 
-const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.get_member_decoration_string(id, index, decoration);
 }
 
-uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
 {
 	return ir.get_decoration(id, decoration);
 }
 
-void Compiler::unset_decoration(uint32_t id, Decoration decoration)
+void Compiler::unset_decoration(ID id, Decoration decoration)
 {
 	ir.unset_decoration(id, decoration);
 }
 
-bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const
+bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
 {
 	auto *m = ir.find_meta(id);
 	if (!m)
@@ -1587,38 +1415,6 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 		return false;
 }
 
-bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to)
-{
-	auto *start = &from;
-
-	if (start->self == to.self)
-		return true;
-
-	// Break cycles.
-	if (is_continue(start->self))
-		return false;
-
-	// If our select block doesn't merge, we must break or continue in these blocks,
-	// so if continues occur branchless within these blocks, consider them branchless as well.
-	// This is typically used for loop control.
-	if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone &&
-	    (block_is_outside_flow_control_from_block(get<SPIRBlock>(start->true_block), to) ||
-	     block_is_outside_flow_control_from_block(get<SPIRBlock>(start->false_block), to)))
-	{
-		return true;
-	}
-	else if (start->merge_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->merge_block), to))
-	{
-		return true;
-	}
-	else if (start->next_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->next_block), to))
-	{
-		return true;
-	}
-	else
-		return false;
-}
-
 bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
 {
 	if (!execution_is_branchless(from, to))
@@ -1658,6 +1454,11 @@ bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &t
 	}
 }
 
+bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
+{
+	return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
+}
+
 SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
 {
 	// The block was deemed too complex during code emit, pick conservative fallback paths.
@@ -1669,6 +1470,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 	if (block.merge == SPIRBlock::MergeLoop)
 		return SPIRBlock::WhileLoop;
 
+	if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
+	{
+		// Continue block is never reached from CFG.
+		return SPIRBlock::ComplexLoop;
+	}
+
 	auto &dominator = get<SPIRBlock>(block.loop_dominator);
 
 	if (execution_is_noop(block, dominator))
@@ -1681,6 +1488,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 		const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
 		const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
 
+		// If we need to flush Phi in this block, we cannot have a DoWhile loop.
+		bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
+		bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
+		if (flush_phi_to_false || flush_phi_to_true)
+			return SPIRBlock::ComplexLoop;
+
 		bool positive_do_while = block.true_block == dominator.self &&
 		                         (block.false_block == dominator.merge_block ||
 		                          (false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
@@ -1702,6 +1515,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
 {
 	handler.set_current_block(block);
+	handler.rearm_current_block(block);
 
 	// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
 	// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
@@ -1725,6 +1539,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
 					return false;
 				if (!handler.end_function_scope(ops, i.length))
 					return false;
+
+				handler.rearm_current_block(block);
 			}
 		}
 	}
@@ -1921,7 +1737,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint
 	return true;
 }
 
-SmallVector<BufferRange> Compiler::get_active_buffer_ranges(uint32_t id) const
+SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
 {
 	SmallVector<BufferRange> ranges;
 	BufferAccessHandler handler(*this, ranges, id);
@@ -2014,19 +1830,19 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo
 	{
 		auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
 
-		if (c.m.c[0].id[0] != 0)
+		if (c.m.c[0].id[0] != ID(0))
 		{
 			x.id = c.m.c[0].id[0];
 			x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
 		}
 
-		if (c.m.c[0].id[1] != 0)
+		if (c.m.c[0].id[1] != ID(0))
 		{
 			y.id = c.m.c[0].id[1];
 			y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
 		}
 
-		if (c.m.c[0].id[2] != 0)
+		if (c.m.c[0].id[2] != ID(0))
 		{
 			z.id = c.m.c[0].id[2];
 			z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
@@ -2081,36 +1897,36 @@ bool Compiler::is_tessellation_shader() const
 	return is_tessellation_shader(get_execution_model());
 }
 
-void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable)
+void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
 {
 	get<SPIRVariable>(id).remapped_variable = remap_enable;
 }
 
-bool Compiler::get_remapped_variable_state(uint32_t id) const
+bool Compiler::get_remapped_variable_state(VariableID id) const
 {
 	return get<SPIRVariable>(id).remapped_variable;
 }
 
-void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components)
+void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
 {
 	get<SPIRVariable>(id).remapped_components = components;
 }
 
-uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const
+uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
 {
 	return get<SPIRVariable>(id).remapped_components;
 }
 
 void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
 {
-	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
 	if (itr == end(e.implied_read_expressions))
 		e.implied_read_expressions.push_back(source);
 }
 
 void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
 {
-	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
 	if (itr == end(e.implied_read_expressions))
 		e.implied_read_expressions.push_back(source);
 }
@@ -2251,7 +2067,7 @@ bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
 		return true;
 
 	auto &execution = get_entry_point();
-	return find(begin(execution.interface_variables), end(execution.interface_variables), id) !=
+	return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
 	       end(execution.interface_variables);
 }
 
@@ -2331,8 +2147,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
 	{
 		for (auto &param : params)
 		{
-			uint32_t image_id = param.global_image ? param.image_id : args[param.image_id];
-			uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id];
+			VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
+			VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
 
 			auto *i = compiler.maybe_get_backing_variable(image_id);
 			auto *s = compiler.maybe_get_backing_variable(sampler_id);
@@ -2341,15 +2157,17 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
 			if (s)
 				sampler_id = s->self;
 
-			register_combined_image_sampler(caller, image_id, sampler_id, param.depth);
+			register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
 		}
 	}
 
 	return true;
 }
 
-void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id,
-                                                                            uint32_t sampler_id, bool depth)
+void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
+                                                                            VariableID combined_module_id,
+                                                                            VariableID image_id, VariableID sampler_id,
+                                                                            bool depth)
 {
 	// We now have a texture ID and a sampler ID which will either be found as a global
 	// or a parameter in our own function. If both are global, they will not need a parameter,
@@ -2409,12 +2227,15 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
 		// Build new variable.
 		compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
 
-		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
-		auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
-		auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags;
-		new_flags.reset();
-		if (old_flags.get(DecorationRelaxedPrecision))
-			new_flags.set(DecorationRelaxedPrecision);
+		// Inherit RelaxedPrecision.
+		// If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+		bool relaxed_precision =
+		    compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
+		    compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
+		    (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+		if (relaxed_precision)
+			compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
 
 		param.id = combined_id;
 
@@ -2621,8 +2442,10 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			if (sampler)
 				sampler_id = sampler->self;
 
+			uint32_t combined_id = args[1];
+
 			auto &combined_type = compiler.get<SPIRType>(args[0]);
-			register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth);
+			register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
 		}
 	}
 
@@ -2630,8 +2453,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	// This information is statically known from the current place in the call stack.
 	// Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
 	// which backing variable the image/sample came from.
-	uint32_t image_id = remap_parameter(args[2]);
-	uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
+	VariableID image_id = remap_parameter(args[2]);
+	VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
 
 	auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
 	                   [image_id, sampler_id](const CombinedImageSampler &combined) {
@@ -2641,6 +2464,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	if (itr == end(compiler.combined_image_samplers))
 	{
 		uint32_t sampled_type;
+		uint32_t combined_module_id;
 		if (is_fetch)
 		{
 			// Have to invent the sampled image type.
@@ -2650,10 +2474,12 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			type.self = sampled_type;
 			type.basetype = SPIRType::SampledImage;
 			type.image.depth = false;
+			combined_module_id = 0;
 		}
 		else
 		{
 			sampled_type = args[0];
+			combined_module_id = args[1];
 		}
 
 		auto id = compiler.ir.increase_bound_by(2);
@@ -2673,12 +2499,14 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
 
 		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
-		auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
-		// Fetch inherits precision from the image, not sampler (there is no sampler).
-		auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
-		new_flags.reset();
-		if (old_flags.get(DecorationRelaxedPrecision))
-			new_flags.set(DecorationRelaxedPrecision);
+		// If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+		bool relaxed_precision =
+		    (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
+		    (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
+		    (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+		if (relaxed_precision)
+			compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
 
 		// Propagate the array type for the original image as well.
 		auto *var = compiler.maybe_get_backing_variable(image_id);
@@ -2695,7 +2523,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	return true;
 }
 
-uint32_t Compiler::build_dummy_sampler_for_combined_images()
+VariableID Compiler::build_dummy_sampler_for_combined_images()
 {
 	DummySamplerForCombinedImageHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
@@ -2749,12 +2577,12 @@ SmallVector<SpecializationConstant> Compiler::get_specialization_constants() con
 	return spec_consts;
 }
 
-SPIRConstant &Compiler::get_constant(uint32_t id)
+SPIRConstant &Compiler::get_constant(ConstantID id)
 {
 	return get<SPIRConstant>(id);
 }
 
-const SPIRConstant &Compiler::get_constant(uint32_t id) const
+const SPIRConstant &Compiler::get_constant(ConstantID id) const
 {
 	return get<SPIRConstant>(id);
 }
@@ -2938,7 +2766,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 2)
 			return false;
 
-		uint32_t ptr = args[0];
+		ID ptr = args[0];
 		auto *var = compiler.maybe_get_backing_variable(ptr);
 
 		// If we store through an access chain, we have a partial write.
@@ -2983,7 +2811,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		// The result of an access chain is a fixed expression and is not really considered a temporary.
 		auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
 		auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
-		e.loaded_from = backing_variable ? backing_variable->self : 0;
+		e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
 
 		// Other backends might use SPIRAccessChain for this later.
 		compiler.ir.ids[args[1]].set_allow_type_rewrite();
@@ -2996,8 +2824,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 2)
 			return false;
 
-		uint32_t lhs = args[0];
-		uint32_t rhs = args[1];
+		ID lhs = args[0];
+		ID rhs = args[1];
 		auto *var = compiler.maybe_get_backing_variable(lhs);
 
 		// If we store through an access chain, we have a partial write.
@@ -3096,6 +2924,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 	}
 
 	case OpArrayLength:
+	case OpLine:
+	case OpNoLine:
 		// Uses literals, but cannot be a phi variable or temporary, so ignore.
 		break;
 
@@ -3334,11 +3164,34 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 
 	unordered_map<uint32_t, uint32_t> potential_loop_variables;
 
+	// Find the loop dominator block for each block.
+	for (auto &block_id : entry.blocks)
+	{
+		auto &block = get<SPIRBlock>(block_id);
+
+		auto itr = ir.continue_block_to_loop_header.find(block_id);
+		if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id)
+		{
+			// Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
+			// Edge case is when continue block is also the loop header, don't set the dominator in this case.
+			block.loop_dominator = itr->second;
+		}
+		else
+		{
+			uint32_t loop_dominator = cfg.find_loop_dominator(block_id);
+			if (loop_dominator != block_id)
+				block.loop_dominator = loop_dominator;
+			else
+				block.loop_dominator = SPIRBlock::NoDominator;
+		}
+	}
+
 	// For each variable which is statically accessed.
 	for (auto &var : handler.accessed_variables_to_block)
 	{
 		// Only deal with variables which are considered local variables in this function.
-		if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables))
+		if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
+		    end(entry.local_variables))
 			continue;
 
 		DominatorBuilder builder(cfg);
@@ -3379,7 +3232,35 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 		builder.lift_continue_block_dominator();
 
 		// Add it to a per-block list of variables.
-		uint32_t dominating_block = builder.get_dominator();
+		BlockID dominating_block = builder.get_dominator();
+
+		// For variables whose dominating block is inside a loop, there is a risk that these variables
+		// actually need to be preserved across loop iterations. We can express this by adding
+		// a "read" access to the loop header.
+		// In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
+		// Should that fail, we look for the outermost loop header and tack on an access there.
+		// Phi nodes cannot have this problem.
+		if (dominating_block)
+		{
+			auto &variable = get<SPIRVariable>(var.first);
+			if (!variable.phi_variable)
+			{
+				auto *block = &get<SPIRBlock>(dominating_block);
+				bool preserve = may_read_undefined_variable_in_block(*block, var.first);
+				if (preserve)
+				{
+					// Find the outermost loop scope.
+					while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
+						block = &get<SPIRBlock>(block->loop_dominator);
+
+					if (block->self != dominating_block)
+					{
+						builder.add_block(block->self);
+						dominating_block = builder.get_dominator();
+					}
+				}
+			}
+		}
 
 		// If all blocks here are dead code, this will be 0, so the variable in question
 		// will be completely eliminated.
@@ -3416,10 +3297,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 		{
 			builder.add_block(block);
 
-			// If a temporary is used in more than one block, we might have to lift continue block
-			// access up to loop header like we did for variables.
 			if (blocks.size() != 1 && is_continue(block))
 			{
+				// The risk here is that inner loop can dominate the continue block.
+				// Any temporary we access in the continue block must be declared before the loop.
+				// This is moot for complex loops however.
 				auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
 				assert(loop_header_block.merge == SPIRBlock::MergeLoop);
 
@@ -3427,14 +3309,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 				if (!loop_header_block.complex_continue)
 					builder.add_block(loop_header_block.self);
 			}
-			else if (blocks.size() != 1 && is_single_block_loop(block))
-			{
-				// Awkward case, because the loop header is also the continue block.
-				force_temporary = true;
-			}
 		}
 
 		uint32_t dominating_block = builder.get_dominator();
+
+		if (blocks.size() != 1 && is_single_block_loop(dominating_block))
+		{
+			// Awkward case, because the loop header is also the continue block,
+			// so hoisting to loop header does not help.
+			force_temporary = true;
+		}
+
 		if (dominating_block)
 		{
 			// If we touch a variable in the dominating block, this is the expected setup.
@@ -3485,17 +3370,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 	{
 		auto &var = get<SPIRVariable>(loop_variable.first);
 		auto dominator = var.dominator;
-		auto block = loop_variable.second;
+		BlockID block = loop_variable.second;
 
 		// The variable was accessed in multiple continue blocks, ignore.
-		if (block == ~(0u) || block == 0)
+		if (block == BlockID(~(0u)) || block == BlockID(0))
 			continue;
 
 		// Dead code.
-		if (dominator == 0)
+		if (dominator == ID(0))
 			continue;
 
-		uint32_t header = 0;
+		BlockID header = 0;
 
 		// Find the loop header for this block if we are a continue block.
 		{
@@ -3554,10 +3439,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 		// merge can occur. Walk the CFG to see if we find anything.
 
 		seen_blocks.clear();
-		cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) {
+		cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
 			// We found a block which accesses the variable outside the loop.
 			if (blocks.find(walk_block) != end(blocks))
 				static_loop_init = false;
+			return true;
 		});
 
 		if (!static_loop_init)
@@ -3572,7 +3458,80 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 	}
 }
 
-Bitset Compiler::get_buffer_block_flags(uint32_t id) const
+bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
+{
+	for (auto &op : block.ops)
+	{
+		auto *ops = stream(op);
+		switch (op.op)
+		{
+		case OpStore:
+		case OpCopyMemory:
+			if (ops[0] == var)
+				return false;
+			break;
+
+		case OpAccessChain:
+		case OpInBoundsAccessChain:
+		case OpPtrAccessChain:
+			// Access chains are generally used to partially read and write. It's too hard to analyze
+			// if all constituents are written fully before continuing, so just assume it's preserved.
+			// This is the same as the parameter preservation analysis.
+			if (ops[2] == var)
+				return true;
+			break;
+
+		case OpSelect:
+			// Variable pointers.
+			// We might read before writing.
+			if (ops[3] == var || ops[4] == var)
+				return true;
+			break;
+
+		case OpPhi:
+		{
+			// Variable pointers.
+			// We might read before writing.
+			if (op.length < 2)
+				break;
+
+			uint32_t count = op.length - 2;
+			for (uint32_t i = 0; i < count; i += 2)
+				if (ops[i + 2] == var)
+					return true;
+			break;
+		}
+
+		case OpCopyObject:
+		case OpLoad:
+			if (ops[2] == var)
+				return true;
+			break;
+
+		case OpFunctionCall:
+		{
+			if (op.length < 3)
+				break;
+
+			// May read before writing.
+			uint32_t count = op.length - 3;
+			for (uint32_t i = 0; i < count; i++)
+				if (ops[i + 3] == var)
+					return true;
+			break;
+		}
+
+		default:
+			break;
+		}
+	}
+
+	// Not accessed somehow, at least not in a usual fashion.
+	// It's likely accessed in a branch, so assume we must preserve.
+	return true;
+}
+
+Bitset Compiler::get_buffer_block_flags(VariableID id) const
 {
 	return ir.get_buffer_block_flags(get<SPIRVariable>(id));
 }
@@ -3848,6 +3807,20 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin
 	return true;
 }
 
+const CFG &Compiler::get_cfg_for_current_function() const
+{
+	assert(current_function);
+	return get_cfg_for_function(current_function->self);
+}
+
+const CFG &Compiler::get_cfg_for_function(uint32_t id) const
+{
+	auto cfg_itr = function_cfgs.find(id);
+	assert(cfg_itr != end(function_cfgs));
+	assert(cfg_itr->second);
+	return *cfg_itr->second;
+}
+
 void Compiler::build_function_control_flow_graphs_and_analyze()
 {
 	CFGBuilder handler(*this);
@@ -3997,13 +3970,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 	return true;
 }
 
-bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const
+bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
 {
 	auto *m = ir.find_meta(id);
 	return m && m->hlsl_is_magic_counter_buffer;
 }
 
-bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const
+bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
 {
 	auto *m = ir.find_meta(id);
 
@@ -4068,20 +4041,69 @@ const SmallVector<std::string> &Compiler::get_declared_extensions() const
 	return ir.declared_extensions;
 }
 
-std::string Compiler::get_remapped_declared_block_name(uint32_t id) const
+std::string Compiler::get_remapped_declared_block_name(VariableID id) const
+{
+	return get_remapped_declared_block_name(id, false);
+}
+
+std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
 {
 	auto itr = declared_block_names.find(id);
 	if (itr != end(declared_block_names))
+	{
 		return itr->second;
+	}
 	else
 	{
 		auto &var = get<SPIRVariable>(id);
-		auto &type = get<SPIRType>(var.basetype);
 
-		auto *type_meta = ir.find_meta(type.self);
-		auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
-		return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
+		if (fallback_prefer_instance_name)
+		{
+			return to_name(var.self);
+		}
+		else
+		{
+			auto &type = get<SPIRType>(var.basetype);
+			auto *type_meta = ir.find_meta(type.self);
+			auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
+			return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
+		}
+	}
+}
+
+bool Compiler::reflection_ssbo_instance_name_is_significant() const
+{
+	if (ir.source.known)
+	{
+		// UAVs from HLSL source tend to be declared in a way where the type is reused
+		// but the instance name is significant, and that's the name we should report.
+		// For GLSL, SSBOs each have their own block type as that's how GLSL is written.
+		return ir.source.hlsl;
 	}
+
+	unordered_set<uint32_t> ssbo_type_ids;
+	bool aliased_ssbo_types = false;
+
+	// If we don't have any OpSource information, we need to perform some shaky heuristics.
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		auto &type = this->get<SPIRType>(var.basetype);
+		if (!type.pointer || var.storage == StorageClassFunction)
+			return;
+
+		bool ssbo = var.storage == StorageClassStorageBuffer ||
+		            (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock));
+
+		if (ssbo)
+		{
+			if (ssbo_type_ids.count(type.self))
+				aliased_ssbo_types = true;
+			else
+				ssbo_type_ids.insert(type.self);
+		}
+	});
+
+	// If the block name is aliased, assume we have HLSL-style UAV declarations.
+	return aliased_ssbo_types;
 }
 
 bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
@@ -4110,6 +4132,8 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul
 	case OpCommitWritePipe:
 	case OpGroupCommitReadPipe:
 	case OpGroupCommitWritePipe:
+	case OpLine:
+	case OpNoLine:
 		return false;
 
 	default:
@@ -4242,6 +4266,316 @@ void Compiler::analyze_non_block_pointer_types()
 	sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
 }
 
+bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
+{
+	if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
+	{
+		if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
+		{
+			// Most complex case, we have no sensible way of dealing with this
+			// other than taking the 100% conservative approach, exit early.
+			split_function_case = true;
+			return false;
+		}
+		else
+		{
+			interlock_function_id = call_stack.back();
+			// If this call is performed inside control flow we have a problem.
+			auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
+
+			uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
+			bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
+			if (!outside_control_flow)
+				control_flow_interlock = true;
+		}
+	}
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
+{
+	current_block_id = block.self;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	call_stack.pop_back();
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+
+	if (args[2] == interlock_function_id)
+		call_stack_is_interlocked = true;
+
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	if (call_stack.back() == interlock_function_id)
+		call_stack_is_interlocked = false;
+
+	call_stack.pop_back();
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
+{
+	if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
+	    split_function_case)
+	{
+		compiler.interlocked_resources.insert(id);
+	}
+}
+
+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+	// Only care about critical section analysis if we have simple case.
+	if (use_critical_section)
+	{
+		if (opcode == OpBeginInvocationInterlockEXT)
+		{
+			in_crit_sec = true;
+			return true;
+		}
+
+		if (opcode == OpEndInvocationInterlockEXT)
+		{
+			// End critical section--nothing more to do.
+			return false;
+		}
+	}
+
+	// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+	switch (opcode)
+	{
+	case OpLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniformConstant:
+		{
+			uint32_t result_type = args[0];
+			uint32_t id = args[1];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			break;
+		}
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
+		if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+		    type.storage == StorageClassStorageBuffer)
+		{
+			uint32_t id = args[1];
+			uint32_t ptr = args[2];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			compiler.ir.ids[id].set_allow_type_rewrite();
+		}
+		break;
+	}
+
+	case OpImageTexelPointer:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var)
+			e.loaded_from = var->self;
+		break;
+	}
+
+	case OpStore:
+	case OpImageWrite:
+	case OpAtomicStore:
+	{
+		if (length < 1)
+			return false;
+
+		uint32_t ptr = args[0];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	case OpCopyMemory:
+	{
+		if (length < 2)
+			return false;
+
+		uint32_t dst = args[0];
+		uint32_t src = args[1];
+		auto *dst_var = compiler.maybe_get_backing_variable(dst);
+		auto *src_var = compiler.maybe_get_backing_variable(src);
+
+		if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+			access_potential_resource(dst_var->self);
+
+		if (src_var)
+		{
+			if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+				break;
+
+			if (src_var->storage == StorageClassUniform &&
+			    !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+			{
+				break;
+			}
+
+			access_potential_resource(src_var->self);
+		}
+
+		break;
+	}
+
+	case OpImageRead:
+	case OpAtomicLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassUniformConstant:
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpAtomicExchange:
+	case OpAtomicCompareExchange:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
+	case OpAtomicIAdd:
+	case OpAtomicISub:
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
+	case OpAtomicAnd:
+	case OpAtomicOr:
+	case OpAtomicXor:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+	if (get_execution_model() == ExecutionModelFragment &&
+	    (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+	{
+		InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
+
+		InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
+		handler.interlock_function_id = prepass_handler.interlock_function_id;
+		handler.split_function_case = prepass_handler.split_function_case;
+		handler.control_flow_interlock = prepass_handler.control_flow_interlock;
+		handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
+
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+		// For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
+		interlocked_is_complex =
+		    !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
+	}
+}
+
 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
 {
 	if (!type.pointer)
@@ -4250,3 +4584,12 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
 	// If parent type has same pointer depth, we must have an array of pointers.
 	return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
 }
+
+bool Compiler::flush_phi_required(BlockID from, BlockID to) const
+{
+	auto &child = get<SPIRBlock>(to);
+	for (auto &phi : child.phi_variables)
+		if (phi.parent == from)
+			return true;
+	return false;
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
index 4129e81..7385a6c 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
@@ -27,18 +27,18 @@ struct Resource
 {
 	// Resources are identified with their SPIR-V ID.
 	// This is the ID of the OpVariable.
-	uint32_t id;
+	ID id;
 
 	// The type ID of the variable which includes arrays and all type modifications.
 	// This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general
 	// since these modifications typically happen on the base_type_id.
-	uint32_t type_id;
+	TypeID type_id;
 
 	// The base type of the declared resource.
 	// This type is the base type which ignores pointers and arrays of the type_id.
 	// This is mostly useful to parse decorations of the underlying type.
 	// base_type_id can also be obtained with get_type(get_type(type_id).self).
-	uint32_t base_type_id;
+	TypeID base_type_id;
 
 	// The declared name (OpName) of the resource.
 	// For Buffer blocks, the name actually reflects the externally
@@ -77,17 +77,17 @@ struct ShaderResources
 struct CombinedImageSampler
 {
 	// The ID of the sampler2D variable.
-	uint32_t combined_id;
+	VariableID combined_id;
 	// The ID of the texture2D variable.
-	uint32_t image_id;
+	VariableID image_id;
 	// The ID of the sampler variable.
-	uint32_t sampler_id;
+	VariableID sampler_id;
 };
 
 struct SpecializationConstant
 {
 	// The ID of the specialization constant.
-	uint32_t id;
+	ConstantID id;
 	// The constant ID of the constant, used in Vulkan during pipeline creation.
 	uint32_t constant_id;
 };
@@ -117,15 +117,6 @@ struct EntryPoint
 	spv::ExecutionModel execution_model;
 };
 
-enum ExtendedDecorations
-{
-	SPIRVCrossDecorationPacked,
-	SPIRVCrossDecorationPackedType,
-	SPIRVCrossDecorationInterfaceMemberIndex,
-	SPIRVCrossDecorationInterfaceOrigID,
-	SPIRVCrossDecorationArgumentBufferID
-};
-
 class Compiler
 {
 public:
@@ -151,81 +142,81 @@ public:
 	virtual std::string compile();
 
 	// Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned.
-	const std::string &get_name(uint32_t id) const;
+	const std::string &get_name(ID id) const;
 
 	// Applies a decoration to an ID. Effectively injects OpDecorate.
-	void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
-	void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
+	void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+	void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
 
 	// Overrides the identifier OpName of an ID.
 	// Identifiers beginning with underscores or identifiers which contain double underscores
 	// are reserved by the implementation.
-	void set_name(uint32_t id, const std::string &name);
+	void set_name(ID id, const std::string &name);
 
 	// Gets a bitmask for the decorations which are applied to ID.
 	// I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar)
-	const Bitset &get_decoration_bitset(uint32_t id) const;
+	const Bitset &get_decoration_bitset(ID id) const;
 
 	// Returns whether the decoration has been applied to the ID.
-	bool has_decoration(uint32_t id, spv::Decoration decoration) const;
+	bool has_decoration(ID id, spv::Decoration decoration) const;
 
 	// Gets the value for decorations which take arguments.
 	// If the decoration is a boolean (i.e. spv::DecorationNonWritable),
 	// 1 will be returned.
 	// If decoration doesn't exist or decoration is not recognized,
 	// 0 will be returned.
-	uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
-	const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
+	uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+	const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
 
 	// Removes the decoration for an ID.
-	void unset_decoration(uint32_t id, spv::Decoration decoration);
+	void unset_decoration(ID id, spv::Decoration decoration);
 
 	// Gets the SPIR-V type associated with ID.
 	// Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource.
-	const SPIRType &get_type(uint32_t id) const;
+	const SPIRType &get_type(TypeID id) const;
 
 	// Gets the SPIR-V type of a variable.
-	const SPIRType &get_type_from_variable(uint32_t id) const;
+	const SPIRType &get_type_from_variable(VariableID id) const;
 
 	// Gets the underlying storage class for an OpVariable.
-	spv::StorageClass get_storage_class(uint32_t id) const;
+	spv::StorageClass get_storage_class(VariableID id) const;
 
 	// If get_name() is an empty string, get the fallback name which will be used
 	// instead in the disassembled source.
-	virtual const std::string get_fallback_name(uint32_t id) const;
+	virtual const std::string get_fallback_name(ID id) const;
 
 	// If get_name() of a Block struct is an empty string, get the fallback name.
 	// This needs to be per-variable as multiple variables can use the same block type.
-	virtual const std::string get_block_fallback_name(uint32_t id) const;
+	virtual const std::string get_block_fallback_name(VariableID id) const;
 
 	// Given an OpTypeStruct in ID, obtain the identifier for member number "index".
 	// This may be an empty string.
-	const std::string &get_member_name(uint32_t id, uint32_t index) const;
+	const std::string &get_member_name(TypeID id, uint32_t index) const;
 
 	// Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index".
-	uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+	uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
 
 	// Sets the member identifier for OpTypeStruct ID, member number "index".
-	void set_member_name(uint32_t id, uint32_t index, const std::string &name);
+	void set_member_name(TypeID id, uint32_t index, const std::string &name);
 
 	// Returns the qualified member identifier for OpTypeStruct ID, member number "index",
 	// or an empty string if no qualified alias exists
-	const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const;
+	const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const;
 
 	// Gets the decoration mask for a member of a struct, similar to get_decoration_mask.
-	const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
+	const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
 
 	// Returns whether the decoration has been applied to a member of a struct.
-	bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+	bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
 
 	// Similar to set_decoration, but for struct members.
-	void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
-	void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+	void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+	void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
 	                                  const std::string &argument);
 
 	// Unsets a member decoration, similar to unset_decoration.
-	void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+	void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
 
 	// Gets the fallback name for a member, similar to get_fallback_name.
 	virtual const std::string get_fallback_member_name(uint32_t index) const
@@ -237,7 +228,7 @@ public:
 	// SPIR-V shader. The granularity of this analysis is per-member of a struct.
 	// This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks.
 	// ID is the Resource::id obtained from get_shader_resources().
-	SmallVector<BufferRange> get_active_buffer_ranges(uint32_t id) const;
+	SmallVector<BufferRange> get_active_buffer_ranges(VariableID id) const;
 
 	// Returns the effective size of a buffer block.
 	size_t get_declared_struct_size(const SPIRType &struct_type) const;
@@ -255,7 +246,7 @@ public:
 	size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const;
 
 	// Returns the effective size of a buffer block struct member.
-	virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
+	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
 
 	// Returns a set of all global variables which are statically accessed
 	// by the control flow graph from the current entry point.
@@ -265,12 +256,12 @@ public:
 	//
 	// To use the returned set as the filter for which variables are used during compilation,
 	// this set can be moved to set_enabled_interface_variables().
-	std::unordered_set<uint32_t> get_active_interface_variables() const;
+	std::unordered_set<VariableID> get_active_interface_variables() const;
 
 	// Sets the interface variables which are used during compilation.
 	// By default, all variables are used.
 	// Once set, compile() will only consider the set in active_variables.
-	void set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables);
+	void set_enabled_interface_variables(std::unordered_set<VariableID> active_variables);
 
 	// Query shader resources, use ids with reflection interface to modify or query binding points, etc.
 	ShaderResources get_shader_resources() const;
@@ -278,19 +269,19 @@ public:
 	// Query shader resources, but only return the variables which are part of active_variables.
 	// E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically
 	// accessed.
-	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> &active_variables) const;
+	ShaderResources get_shader_resources(const std::unordered_set<VariableID> &active_variables) const;
 
 	// Remapped variables are considered built-in variables and a backend will
 	// not emit a declaration for this variable.
 	// This is mostly useful for making use of builtins which are dependent on extensions.
-	void set_remapped_variable_state(uint32_t id, bool remap_enable);
-	bool get_remapped_variable_state(uint32_t id) const;
+	void set_remapped_variable_state(VariableID id, bool remap_enable);
+	bool get_remapped_variable_state(VariableID id) const;
 
 	// For subpassInput variables which are remapped to plain variables,
 	// the number of components in the remapped
 	// variable must be specified as the backing type of subpass inputs are opaque.
-	void set_subpass_input_remapped_components(uint32_t id, uint32_t components);
-	uint32_t get_subpass_input_remapped_components(uint32_t id) const;
+	void set_subpass_input_remapped_components(VariableID id, uint32_t components);
+	uint32_t get_subpass_input_remapped_components(VariableID id) const;
 
 	// All operations work on the current entry point.
 	// Entry points can be swapped out with set_entry_point().
@@ -371,7 +362,7 @@ public:
 	// If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile().
 	// Calling this function also invalidates get_active_interface_variables(), so this should be called
 	// before that function.
-	uint32_t build_dummy_sampler_for_combined_images();
+	VariableID build_dummy_sampler_for_combined_images();
 
 	// Analyzes all separate image and samplers used from the currently selected entry point,
 	// and re-routes them all to a combined image sampler instead.
@@ -420,8 +411,8 @@ public:
 	// constant_type is the SPIRType for the specialization constant,
 	// which can be queried to determine which fields in the unions should be poked at.
 	SmallVector<SpecializationConstant> get_specialization_constants() const;
-	SPIRConstant &get_constant(uint32_t id);
-	const SPIRConstant &get_constant(uint32_t id) const;
+	SPIRConstant &get_constant(ConstantID id);
+	const SPIRConstant &get_constant(ConstantID id) const;
 
 	uint32_t get_current_id_bound() const
 	{
@@ -444,7 +435,7 @@ public:
 	// If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true,
 	// otherwise, returns false.
 	// If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false.
-	bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const;
+	bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const;
 
 	// HLSL counter buffer reflection interface.
 	// Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where
@@ -459,7 +450,7 @@ public:
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
 
-	bool buffer_is_hlsl_counter_buffer(uint32_t id) const;
+	bool buffer_is_hlsl_counter_buffer(VariableID id) const;
 
 	// Queries if a buffer object has a neighbor "counter" buffer.
 	// If so, the ID of that counter buffer will be returned in counter_id.
@@ -467,7 +458,7 @@ public:
 	// Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
-	bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const;
+	bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const;
 
 	// Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module.
 	const SmallVector<spv::Capability> &get_declared_capabilities() const;
@@ -488,13 +479,13 @@ public:
 	// ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type.
 	//
 	// This also applies to HLSL cbuffers.
-	std::string get_remapped_declared_block_name(uint32_t id) const;
+	std::string get_remapped_declared_block_name(VariableID id) const;
 
 	// For buffer block variables, get the decorations for that variable.
 	// Sometimes, decorations for buffer blocks are found in member decorations instead
 	// of direct decorations on the variable itself.
 	// The most common use here is to check if a buffer is readonly or writeonly.
-	Bitset get_buffer_block_flags(uint32_t id) const;
+	Bitset get_buffer_block_flags(VariableID id) const;
 
 protected:
 	const uint32_t *stream(const Instruction &instr) const
@@ -518,7 +509,7 @@ protected:
 
 	SPIRFunction *current_function = nullptr;
 	SPIRBlock *current_block = nullptr;
-	std::unordered_set<uint32_t> active_interface_variables;
+	std::unordered_set<VariableID> active_interface_variables;
 	bool check_active_interface_variables = false;
 
 	// If our IDs are out of range here as part of opcodes, throw instead of
@@ -558,7 +549,9 @@ protected:
 	template <typename T>
 	const T *maybe_get(uint32_t id) const
 	{
-		if (ir.ids[id].get_type() == static_cast<Types>(T::type))
+		if (id >= ir.ids.size())
+			return nullptr;
+		else if (ir.ids[id].get_type() == static_cast<Types>(T::type))
 			return &get<T>(id);
 		else
 			return nullptr;
@@ -614,6 +607,7 @@ protected:
 	bool expression_is_lvalue(uint32_t id) const;
 	bool variable_storage_is_aliased(const SPIRVariable &var);
 	SPIRVariable *maybe_get_backing_variable(uint32_t chain);
+	spv::StorageClass get_backing_variable_storage(uint32_t ptr);
 
 	void register_read(uint32_t expr, uint32_t chain, bool forwarded);
 	void register_write(uint32_t chain);
@@ -626,7 +620,7 @@ protected:
 	inline bool is_single_block_loop(uint32_t next) const
 	{
 		auto &block = get<SPIRBlock>(next);
-		return block.merge == SPIRBlock::MergeLoop && block.continue_block == next;
+		return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next);
 	}
 
 	inline bool is_break(uint32_t next) const
@@ -666,9 +660,9 @@ protected:
 
 	bool function_is_pure(const SPIRFunction &func);
 	bool block_is_pure(const SPIRBlock &block);
-	bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to);
 
 	bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const;
+	bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const;
 	bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const;
 	SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const;
 
@@ -718,6 +712,13 @@ protected:
 		{
 		}
 
+		// Called after returning from a function or when entering a block,
+		// can be called multiple times per block,
+		// while set_current_block is only called on block entry.
+		virtual void rearm_current_block(const SPIRBlock &)
+		{
+		}
+
 		virtual bool begin_function_scope(const uint32_t *, uint32_t)
 		{
 			return true;
@@ -749,7 +750,7 @@ protected:
 
 	struct InterfaceVariableAccessHandler : OpcodeHandler
 	{
-		InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<uint32_t> &variables_)
+		InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<VariableID> &variables_)
 		    : compiler(compiler_)
 		    , variables(variables_)
 		{
@@ -758,7 +759,7 @@ protected:
 		bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
 
 		const Compiler &compiler;
-		std::unordered_set<uint32_t> &variables;
+		std::unordered_set<VariableID> &variables;
 	};
 
 	struct CombinedImageSamplerHandler : OpcodeHandler
@@ -780,8 +781,8 @@ protected:
 		uint32_t remap_parameter(uint32_t id);
 		void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length);
 		void pop_remap_parameters();
-		void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id,
-		                                     bool depth);
+		void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id,
+		                                     VariableID sampler_id, bool depth);
 	};
 
 	struct DummySamplerForCombinedImageHandler : OpcodeHandler
@@ -814,7 +815,7 @@ protected:
 	// This must be an ordered data structure so we always pick the same type aliases.
 	SmallVector<uint32_t> global_struct_cache;
 
-	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const;
+	ShaderResources get_shader_resources(const std::unordered_set<VariableID> *active_variables) const;
 
 	VariableTypeRemapCallback variable_remap_callback;
 
@@ -822,7 +823,9 @@ protected:
 
 	std::unordered_set<uint32_t> forced_temporaries;
 	std::unordered_set<uint32_t> forwarded_temporaries;
+	std::unordered_set<uint32_t> suppressed_usage_tracking;
 	std::unordered_set<uint32_t> hoisted_temporaries;
+	std::unordered_set<uint32_t> forced_invariant_temporaries;
 
 	Bitset active_input_builtins;
 	Bitset active_output_builtins;
@@ -889,9 +892,12 @@ protected:
 
 	void build_function_control_flow_graphs_and_analyze();
 	std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
+	const CFG &get_cfg_for_current_function() const;
+	const CFG &get_cfg_for_function(uint32_t id) const;
+
 	struct CFGBuilder : OpcodeHandler
 	{
-		CFGBuilder(Compiler &compiler_);
+		explicit CFGBuilder(Compiler &compiler_);
 
 		bool follow_function_call(const SPIRFunction &func) override;
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@@ -936,7 +942,7 @@ protected:
 
 	struct PhysicalStorageBufferPointerHandler : OpcodeHandler
 	{
-		PhysicalStorageBufferPointerHandler(Compiler &compiler_);
+		explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
 		Compiler &compiler;
 		std::unordered_set<uint32_t> types;
@@ -947,6 +953,62 @@ protected:
 	void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler);
 	void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler,
 	                              bool single_function);
+	bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
+
+	// Finds all resources that are written to from inside the critical section, if present.
+	// The critical section is delimited by OpBeginInvocationInterlockEXT and
+	// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+	// while inside the critical section must be placed in a raster order group.
+	struct InterlockedResourceAccessHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
+		    : compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		bool in_crit_sec = false;
+
+		uint32_t interlock_function_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		bool use_critical_section = false;
+		bool call_stack_is_interlocked = false;
+		SmallVector<uint32_t> call_stack;
+
+		void access_potential_resource(uint32_t id);
+	};
+
+	struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
+		    : compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		void rearm_current_block(const SPIRBlock &block) override;
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		uint32_t interlock_function_id = 0;
+		uint32_t current_block_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		SmallVector<uint32_t> call_stack;
+	};
+
+	void analyze_interlocked_resource_usage();
+	// The set of all resources written while inside the critical section, if present.
+	std::unordered_set<uint32_t> interlocked_resources;
+	bool interlocked_is_complex = false;
 
 	void make_constant_null(uint32_t id, uint32_t type);
 
@@ -972,15 +1034,18 @@ protected:
 	void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration);
 
 	bool type_is_array_of_pointers(const SPIRType &type) const;
+	bool type_is_block_like(const SPIRType &type) const;
+	bool type_is_opaque_value(const SPIRType &type) const;
+
+	bool reflection_ssbo_instance_name_is_significant() const;
+	std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const;
+
+	bool flush_phi_required(BlockID from, BlockID to) const;
 
 private:
 	// Used only to implement the old deprecated get_entry_point() interface.
 	const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
 	SPIREntryPoint &get_first_entry_point(const std::string &name);
-
-	void fixup_type_alias();
-	bool type_is_block_like(const SPIRType &type) const;
-	bool type_is_opaque_value(const SPIRType &type) const;
 };
 } // namespace SPIRV_CROSS_NAMESPACE
 
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
index d3352d9..f6e63b4 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
@@ -33,6 +33,11 @@
 #if SPIRV_CROSS_C_API_REFLECT
 #include "spirv_reflect.hpp"
 #endif
+
+#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
+#include "gitversion.h"
+#endif
+
 #include "spirv_parser.hpp"
 #include <memory>
 #include <new>
@@ -157,7 +162,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation
 
 struct spvc_set_s : ScratchMemoryAllocation
 {
-	std::unordered_set<uint32_t> set;
+	std::unordered_set<VariableID> set;
 };
 
 // Dummy-inherit to we can keep our opaque type handle type safe in C-land as well,
@@ -412,6 +417,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_FLIP_VERTEX_Y:
 		options->glsl.vertex.flip_vert_y = value != 0;
 		break;
+	case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES:
+		options->glsl.emit_line_directives = value != 0;
+		break;
 
 	case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE:
 		options->glsl.vertex.support_nonzero_base_instance = value != 0;
@@ -474,8 +482,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 		options->msl.texel_buffer_texture_width = value;
 		break;
 
-	case SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX:
-		options->msl.aux_buffer_index = value;
+	case SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX:
+		options->msl.swizzle_buffer_index = value;
 		break;
 
 	case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX:
@@ -533,6 +541,34 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE:
 		options->msl.texture_buffer_native = value != 0;
 		break;
+
+	case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX:
+		options->msl.buffer_size_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_MULTIVIEW:
+		options->msl.multiview = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX:
+		options->msl.view_mask_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX:
+		options->msl.device_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX:
+		options->msl.view_index_from_device_index = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE:
+		options->msl.dispatch_base = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX:
+		options->msl.dynamic_offsets_buffer_index = value;
+		break;
 #endif
 
 	default:
@@ -726,7 +762,7 @@ spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler)
 #endif
 }
 
-spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
+spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler)
 {
 #if SPIRV_CROSS_C_API_MSL
 	if (compiler->backend != SPVC_BACKEND_MSL)
@@ -736,13 +772,35 @@ spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
 	}
 
 	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	return msl.needs_aux_buffer() ? SPVC_TRUE : SPVC_FALSE;
+	return msl.needs_swizzle_buffer() ? SPVC_TRUE : SPVC_FALSE;
 #else
 	compiler->context->report_error("MSL function used on a non-MSL backend.");
 	return SPVC_FALSE;
 #endif
 }
 
+spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_FALSE;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.needs_buffer_size_buffer() ? SPVC_TRUE : SPVC_FALSE;
+#else
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_FALSE;
+#endif
+}
+
+spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
+{
+	return spvc_compiler_msl_needs_swizzle_buffer(compiler);
+}
+
 spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -811,7 +869,7 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
 	attr.msl_stride = va->msl_stride;
 	attr.format = static_cast<MSLVertexFormat>(va->format);
 	attr.builtin = static_cast<spv::BuiltIn>(va->builtin);
-	attr.per_instance = va->per_instance;
+	attr.per_instance = va->per_instance != 0;
 	msl.add_msl_vertex_attribute(attr);
 	return SPVC_SUCCESS;
 #else
@@ -848,6 +906,27 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
 #endif
 }
 
+spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.add_dynamic_buffer(desc_set, binding, index);
+	return SPVC_SUCCESS;
+#else
+	(void)binding;
+	(void)desc_set;
+	(void)index;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -867,6 +946,26 @@ spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler
 #endif
 }
 
+spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.set_argument_buffer_device_address_space(desc_set, bool(device_address));
+	return SPVC_SUCCESS;
+#else
+	(void)desc_set;
+	(void)device_address;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -907,38 +1006,140 @@ spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutio
 #endif
 }
 
-spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id,
-                                                      const spvc_msl_constexpr_sampler *sampler)
-{
 #if SPIRV_CROSS_C_API_MSL
-	if (compiler->backend != SPVC_BACKEND_MSL)
-	{
-		compiler->context->report_error("MSL function used on a non-MSL backend.");
-		return SPVC_ERROR_INVALID_ARGUMENT;
-	}
-
-	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	MSLConstexprSampler samp;
+static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler)
+{
 	samp.s_address = static_cast<MSLSamplerAddress>(sampler->s_address);
 	samp.t_address = static_cast<MSLSamplerAddress>(sampler->t_address);
 	samp.r_address = static_cast<MSLSamplerAddress>(sampler->r_address);
 	samp.lod_clamp_min = sampler->lod_clamp_min;
 	samp.lod_clamp_max = sampler->lod_clamp_max;
-	samp.lod_clamp_enable = sampler->lod_clamp_enable;
+	samp.lod_clamp_enable = sampler->lod_clamp_enable != 0;
 	samp.min_filter = static_cast<MSLSamplerFilter>(sampler->min_filter);
 	samp.mag_filter = static_cast<MSLSamplerFilter>(sampler->mag_filter);
 	samp.mip_filter = static_cast<MSLSamplerMipFilter>(sampler->mip_filter);
-	samp.compare_enable = sampler->compare_enable;
-	samp.anisotropy_enable = sampler->anisotropy_enable;
+	samp.compare_enable = sampler->compare_enable != 0;
+	samp.anisotropy_enable = sampler->anisotropy_enable != 0;
 	samp.max_anisotropy = sampler->max_anisotropy;
 	samp.compare_func = static_cast<MSLSamplerCompareFunc>(sampler->compare_func);
 	samp.coord = static_cast<MSLSamplerCoord>(sampler->coord);
 	samp.border_color = static_cast<MSLSamplerBorderColor>(sampler->border_color);
+}
+
+static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+	samp.ycbcr_conversion_enable = conv != nullptr;
+	if (conv == nullptr) return;
+	samp.planes = conv->planes;
+	samp.resolution = static_cast<MSLFormatResolution>(conv->resolution);
+	samp.chroma_filter = static_cast<MSLSamplerFilter>(conv->chroma_filter);
+	samp.x_chroma_offset = static_cast<MSLChromaLocation>(conv->x_chroma_offset);
+	samp.y_chroma_offset = static_cast<MSLChromaLocation>(conv->y_chroma_offset);
+	for (int i = 0; i < 4; i++)
+		samp.swizzle[i] = static_cast<MSLComponentSwizzle>(conv->swizzle[i]);
+	samp.ycbcr_model = static_cast<MSLSamplerYCbCrModelConversion>(conv->ycbcr_model);
+	samp.ycbcr_range = static_cast<MSLSamplerYCbCrRange>(conv->ycbcr_range);
+	samp.bpc = conv->bpc;
+}
+#endif
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id,
+                                                      const spvc_msl_constexpr_sampler *sampler)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	msl.remap_constexpr_sampler(id, samp);
+	return SPVC_SUCCESS;
+#else
+	(void)id;
+	(void)sampler;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler,
+                                                                 unsigned desc_set, unsigned binding,
+                                                                 const spvc_msl_constexpr_sampler *sampler)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp);
+	return SPVC_SUCCESS;
+#else
+	(void)desc_set;
+	(void)binding;
+	(void)sampler;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id,
+                                                            const spvc_msl_constexpr_sampler *sampler,
+                                                            const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
 	msl.remap_constexpr_sampler(id, samp);
 	return SPVC_SUCCESS;
 #else
 	(void)id;
 	(void)sampler;
+	(void)conv;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler,
+                                                                       unsigned desc_set, unsigned binding,
+                                                                       const spvc_msl_constexpr_sampler *sampler,
+                                                                       const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
+	msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp);
+	return SPVC_SUCCESS;
+#else
+	(void)desc_set;
+	(void)binding;
+	(void)sampler;
+	(void)conv;
 	compiler->context->report_error("MSL function used on a non-MSL backend.");
 	return SPVC_ERROR_INVALID_ARGUMENT;
 #endif
@@ -965,6 +1166,42 @@ spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compi
 #endif
 }
 
+unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return uint32_t(-1);
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.get_automatic_msl_resource_binding(id);
+#else
+	(void)id;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return uint32_t(-1);
+#endif
+}
+
+unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return uint32_t(-1);
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.get_automatic_msl_resource_binding_secondary(id);
+#else
+	(void)id;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return uint32_t(-1);
+#endif
+}
+
 spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source)
 {
 	SPVC_BEGIN_SAFE_SCOPE
@@ -1276,6 +1513,11 @@ const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, s
 	    .c_str();
 }
 
+const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index)
+{
+	return compiler->compiler->get_member_name(id, member_index).c_str();
+}
+
 spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points,
                                            size_t *num_entry_points)
 {
@@ -1413,7 +1655,7 @@ unsigned spvc_type_get_bit_width(spvc_type type)
 	return type->width;
 }
 
-unsigned spvc_type_get_SmallVector_size(spvc_type type)
+unsigned spvc_type_get_vector_size(spvc_type type)
 {
 	return type->vecsize;
 }
@@ -1516,6 +1758,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c
 	return SPVC_SUCCESS;
 }
 
+spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size)
+{
+	SPVC_BEGIN_SAFE_SCOPE
+	{
+		*size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index);
+	}
+	SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT)
+	return SPVC_SUCCESS;
+}
+
 spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset)
 {
 	SPVC_BEGIN_SAFE_SCOPE
@@ -1643,6 +1895,32 @@ spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc
 	return ret;
 }
 
+spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
+                                                   spvc_variable_id id,
+                                                   const spvc_buffer_range **ranges,
+                                                   size_t *num_ranges)
+{
+	SPVC_BEGIN_SAFE_SCOPE
+	{
+		auto active_ranges = compiler->compiler->get_active_buffer_ranges(id);
+		SmallVector<spvc_buffer_range> translated;
+		translated.reserve(active_ranges.size());
+		for (auto &r : active_ranges)
+		{
+			spvc_buffer_range trans = { r.index, r.offset, r.range };
+			translated.push_back(trans);
+		}
+
+		auto ptr = spvc_allocate<TemporaryBuffer<spvc_buffer_range>>();
+		ptr->buffer = std::move(translated);
+		*ranges = ptr->buffer.data();
+		*num_ranges = ptr->buffer.size();
+		compiler->context->allocations.push_back(std::move(ptr));
+	}
+	SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY)
+	return SPVC_SUCCESS;
+}
+
 float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row)
 {
 	return constant->scalar_f16(column, row);
@@ -1854,6 +2132,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler)
 #endif
 }
 
+void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	MSLConstexprSampler defaults;
+	conv->planes = defaults.planes;
+	conv->resolution = static_cast<spvc_msl_format_resolution>(defaults.resolution);
+	conv->chroma_filter = static_cast<spvc_msl_sampler_filter>(defaults.chroma_filter);
+	conv->x_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.x_chroma_offset);
+	conv->y_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.y_chroma_offset);
+	for (int i = 0; i < 4; i++)
+		conv->swizzle[i] = static_cast<spvc_msl_component_swizzle>(defaults.swizzle[i]);
+	conv->ycbcr_model = static_cast<spvc_msl_sampler_ycbcr_model_conversion>(defaults.ycbcr_model);
+	conv->ycbcr_range = static_cast<spvc_msl_sampler_ycbcr_range>(defaults.ycbcr_range);
+#else
+	memset(conv, 0, sizeof(*conv));
+#endif
+}
+
 unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler)
 {
 	return compiler->compiler->get_current_id_bound();
@@ -1866,6 +2162,15 @@ void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch)
 	*patch = SPVC_C_API_VERSION_PATCH;
 }
 
+const char *spvc_get_commit_revision_and_timestamp(void)
+{
+#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
+	return SPIRV_CROSS_GIT_REVISION;
+#else
+	return "";
+#endif
+}
+
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
index 9e10d07..f950803 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
@@ -33,7 +33,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 7
+#define SPVC_C_API_VERSION_MINOR 19
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0
 
@@ -58,6 +58,9 @@ extern "C" {
  */
 SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch);
 
+/* Gets a human readable version string to identify which commit a particular binary was created from. */
+SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void);
+
 /* These types are opaque to the user. */
 typedef struct spvc_context_s *spvc_context;
 typedef struct spvc_parsed_ir_s *spvc_parsed_ir;
@@ -112,6 +115,14 @@ typedef struct spvc_specialization_constant
 } spvc_specialization_constant;
 
 /* See C++ API. */
+typedef struct spvc_buffer_range
+{
+	unsigned index;
+	size_t offset;
+	size_t range;
+} spvc_buffer_range;
+
+/* See C++ API. */
 typedef struct spvc_hlsl_root_constants
 {
 	unsigned start;
@@ -290,9 +301,14 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
 
 #define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u))
 #define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
+#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
+#define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u))
+#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u))
+
+/* Obsolete. Sticks around for backwards compatibility. */
 #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
 
-/* Runtime check for incompatibility. */
+/* Runtime check for incompatibility. Obsolete. */
 SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void);
 
 /* Maps to C++ API. */
@@ -355,6 +371,55 @@ typedef enum spvc_msl_sampler_border_color
 } spvc_msl_sampler_border_color;
 
 /* Maps to C++ API. */
+typedef enum spvc_msl_format_resolution
+{
+	SPVC_MSL_FORMAT_RESOLUTION_444 = 0,
+	SPVC_MSL_FORMAT_RESOLUTION_422,
+	SPVC_MSL_FORMAT_RESOLUTION_420,
+	SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+} spvc_msl_format_resolution;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_chroma_location
+{
+	SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+	SPVC_MSL_CHROMA_LOCATION_MIDPOINT,
+	SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+} spvc_msl_chroma_location;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_component_swizzle
+{
+	SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+	SPVC_MSL_COMPONENT_SWIZZLE_ZERO,
+	SPVC_MSL_COMPONENT_SWIZZLE_ONE,
+	SPVC_MSL_COMPONENT_SWIZZLE_R,
+	SPVC_MSL_COMPONENT_SWIZZLE_G,
+	SPVC_MSL_COMPONENT_SWIZZLE_B,
+	SPVC_MSL_COMPONENT_SWIZZLE_A,
+	SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+} spvc_msl_component_swizzle;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_sampler_ycbcr_model_conversion
+{
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_model_conversion;
+
+/* Maps to C+ API. */
+typedef enum spvc_msl_sampler_ycbcr_range
+{
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_range;
+
+/* Maps to C++ API. */
 typedef struct spvc_msl_constexpr_sampler
 {
 	spvc_msl_sampler_coord coord;
@@ -381,6 +446,26 @@ typedef struct spvc_msl_constexpr_sampler
  */
 SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler);
 
+/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */
+typedef struct spvc_msl_sampler_ycbcr_conversion
+{
+	unsigned planes;
+	spvc_msl_format_resolution resolution;
+	spvc_msl_sampler_filter chroma_filter;
+	spvc_msl_chroma_location x_chroma_offset;
+	spvc_msl_chroma_location y_chroma_offset;
+	spvc_msl_component_swizzle swizzle[4];
+	spvc_msl_sampler_ycbcr_model_conversion ycbcr_model;
+	spvc_msl_sampler_ycbcr_range ycbcr_range;
+	unsigned bpc;
+} spvc_msl_sampler_ycbcr_conversion;
+
+/*
+ * Initializes the constexpr sampler struct.
+ * The defaults are non-zero.
+ */
+SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv);
+
 /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */
 typedef enum spvc_compiler_option
 {
@@ -407,7 +492,11 @@ typedef enum spvc_compiler_option
 
 	SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT,
 	SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	/* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */
 	SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
+
 	SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT,
 	SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT,
 	SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT,
@@ -428,6 +517,17 @@ typedef enum spvc_compiler_option
 
 	SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT,
 
+	SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
+
 	SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
 } spvc_compiler_option;
 
@@ -505,7 +605,12 @@ SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin
  * Maps to C++ API.
  */
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler);
+
+/* Obsolete. Renamed to needs_swizzle_buffer. */
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler);
+
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler);
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler);
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler);
@@ -514,14 +619,23 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
                                                                    const spvc_msl_resource_binding *binding);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address);
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location);
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler,
                                                              SpvExecutionModel model,
                                                              unsigned set,
                                                              unsigned binding);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components);
 
+SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id);
+SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index);
+
 /*
  * Reflect resources.
  * Maps almost 1:1 to C++ API.
@@ -567,6 +681,7 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compi
                                                              unsigned member_index, SpvDecoration decoration);
 SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id,
                                                                        unsigned member_index, SpvDecoration decoration);
+SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index);
 
 /*
  * Entry points.
@@ -626,6 +741,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size);
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler,
                                                                                  spvc_type struct_type, size_t array_size, size_t *size);
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size);
 
 SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler,
                                                                     spvc_type type, unsigned index, unsigned *offset);
@@ -660,6 +776,15 @@ SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specializatio
                                                                                             spvc_specialization_constant *z);
 
 /*
+ * Buffer ranges
+ * Maps to C++ API.
+ */
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
+                                                                   spvc_variable_id id,
+                                                                   const spvc_buffer_range **ranges,
+                                                                   size_t *num_ranges);
+
+/*
  * No stdint.h until C99, sigh :(
  * For smaller types, the result is sign or zero-extended as appropriate.
  * Maps to C++ API.
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
index 393f461..7b507e1 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
@@ -61,7 +61,8 @@ public:
 private:
 #if defined(_MSC_VER) && _MSC_VER < 1900
 	// MSVC 2013 workarounds, sigh ...
-	union {
+	union
+	{
 		char aligned_char[sizeof(T) * N];
 		double dummy_aligner;
 	} u;
@@ -447,6 +448,11 @@ public:
 		}
 	}
 
+	void insert(T *itr, const T &value)
+	{
+		insert(itr, &value, &value + 1);
+	}
+
 	T *erase(T *itr)
 	{
 		std::move(itr + 1, this->end(), itr);
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
index 108000c..2387267 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
@@ -41,6 +41,7 @@ ParsedIR::ParsedIR()
 	pool_group->pools[TypeCombinedImageSampler].reset(new ObjectPool<SPIRCombinedImageSampler>);
 	pool_group->pools[TypeAccessChain].reset(new ObjectPool<SPIRAccessChain>);
 	pool_group->pools[TypeUndef].reset(new ObjectPool<SPIRUndef>);
+	pool_group->pools[TypeString].reset(new ObjectPool<SPIRString>);
 }
 
 // Should have been default-implemented, but need this on MSVC 2013.
@@ -71,7 +72,8 @@ ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT
 
 		default_entry_point = other.default_entry_point;
 		source = other.source;
-		loop_iteration_depth = other.loop_iteration_depth;
+		loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+		loop_iteration_depth_soft = other.loop_iteration_depth_soft;
 	}
 	return *this;
 }
@@ -99,7 +101,8 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other)
 		entry_points = other.entry_points;
 		default_entry_point = other.default_entry_point;
 		source = other.source;
-		loop_iteration_depth = other.loop_iteration_depth;
+		loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+		loop_iteration_depth_soft = other.loop_iteration_depth_soft;
 		addressing_model = other.addressing_model;
 		memory_model = other.memory_model;
 
@@ -159,7 +162,7 @@ static string ensure_valid_identifier(const string &name, bool member)
 	return str;
 }
 
-const string &ParsedIR::get_name(uint32_t id) const
+const string &ParsedIR::get_name(ID id) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -168,7 +171,7 @@ const string &ParsedIR::get_name(uint32_t id) const
 		return empty_string;
 }
 
-const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
+const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -181,7 +184,7 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
 		return empty_string;
 }
 
-void ParsedIR::set_name(uint32_t id, const string &name)
+void ParsedIR::set_name(ID id, const string &name)
 {
 	auto &str = meta[id].decoration.alias;
 	str.clear();
@@ -196,7 +199,7 @@ void ParsedIR::set_name(uint32_t id, const string &name)
 	str = ensure_valid_identifier(name, false);
 }
 
-void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name)
+void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name)
 {
 	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
 
@@ -212,7 +215,7 @@ void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name)
 	str = ensure_valid_identifier(name, true);
 }
 
-void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument)
+void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.set(decoration);
@@ -228,7 +231,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s
 	}
 }
 
-void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.set(decoration);
@@ -294,7 +297,7 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 	}
 }
 
-void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
 {
 	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
 	auto &dec = meta[id].members[index];
@@ -342,7 +345,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 
 // Recursively marks any constants referenced by the specified constant instruction as being used
 // as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp).
-void ParsedIR::mark_used_as_array_length(uint32_t id)
+void ParsedIR::mark_used_as_array_length(ID id)
 {
 	switch (ids[id].get_type())
 	{
@@ -353,8 +356,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id)
 	case TypeConstantOp:
 	{
 		auto &cop = get<SPIRConstantOp>(id);
-		for (uint32_t arg_id : cop.arguments)
-			mark_used_as_array_length(arg_id);
+		if (cop.opcode == OpCompositeExtract)
+			mark_used_as_array_length(cop.arguments[0]);
+		else if (cop.opcode == OpCompositeInsert)
+		{
+			mark_used_as_array_length(cop.arguments[0]);
+			mark_used_as_array_length(cop.arguments[1]);
+		}
+		else
+			for (uint32_t arg_id : cop.arguments)
+				mark_used_as_array_length(arg_id);
 		break;
 	}
 
@@ -390,7 +401,7 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const
 	return base_flags;
 }
 
-const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -403,12 +414,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index
 		return cleared_bitset;
 }
 
-bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const
+bool ParsedIR::has_decoration(ID id, Decoration decoration) const
 {
 	return get_decoration_bitset(id).get(decoration);
 }
 
-uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -449,7 +460,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
 	}
 }
 
-const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -470,7 +481,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration
 	}
 }
 
-void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
+void ParsedIR::unset_decoration(ID id, Decoration decoration)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.clear(decoration);
@@ -532,12 +543,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
 	}
 }
 
-bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return get_member_decoration_bitset(id, index).get(decoration);
 }
 
-uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -571,7 +582,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration
 	}
 }
 
-const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
+const Bitset &ParsedIR::get_decoration_bitset(ID id) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -583,7 +594,7 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
 		return cleared_bitset;
 }
 
-void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument)
+void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument)
 {
 	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
 	auto &dec = meta[id].members[index];
@@ -600,7 +611,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat
 	}
 }
 
-const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -623,7 +634,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index
 		return empty_string;
 }
 
-void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
 {
 	auto &m = meta[id];
 	if (index >= m.members.size())
@@ -676,7 +687,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount)
 	return uint32_t(curr_bound);
 }
 
-void ParsedIR::remove_typed_id(Types type, uint32_t id)
+void ParsedIR::remove_typed_id(Types type, ID id)
 {
 	auto &type_ids = ids_for_type[type];
 	type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids));
@@ -691,11 +702,18 @@ void ParsedIR::reset_all_of_type(Types type)
 	ids_for_type[type].clear();
 }
 
-void ParsedIR::add_typed_id(Types type, uint32_t id)
+void ParsedIR::add_typed_id(Types type, ID id)
 {
-	if (loop_iteration_depth)
+	if (loop_iteration_depth_hard != 0)
 		SPIRV_CROSS_THROW("Cannot add typed ID while looping over it.");
 
+	if (loop_iteration_depth_soft != 0)
+	{
+		if (!ids[id].empty())
+			SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked.");
+		return;
+	}
+
 	if (ids[id].empty() || ids[id].get_type() != type)
 	{
 		switch (type)
@@ -730,7 +748,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id)
 	}
 }
 
-const Meta *ParsedIR::find_meta(uint32_t id) const
+const Meta *ParsedIR::find_meta(ID id) const
 {
 	auto itr = meta.find(id);
 	if (itr != end(meta))
@@ -739,7 +757,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const
 		return nullptr;
 }
 
-Meta *ParsedIR::find_meta(uint32_t id)
+Meta *ParsedIR::find_meta(ID id)
 {
 	auto itr = meta.find(id);
 	if (itr != end(meta))
@@ -748,4 +766,41 @@ Meta *ParsedIR::find_meta(uint32_t id)
 		return nullptr;
 }
 
+ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const
+{
+	return ParsedIR::LoopLock(&loop_iteration_depth_hard);
+}
+
+ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const
+{
+	return ParsedIR::LoopLock(&loop_iteration_depth_soft);
+}
+
+ParsedIR::LoopLock::~LoopLock()
+{
+	if (lock)
+		(*lock)--;
+}
+
+ParsedIR::LoopLock::LoopLock(uint32_t *lock_)
+    : lock(lock_)
+{
+	if (lock)
+		(*lock)++;
+}
+
+ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+	*this = move(other);
+}
+
+ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+	if (lock)
+		(*lock)--;
+	lock = other.lock;
+	other.lock = nullptr;
+	return *this;
+}
+
 } // namespace SPIRV_CROSS_NAMESPACE
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
index 79e9e15..97bc9ea 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
@@ -57,19 +57,19 @@ public:
 	SmallVector<Variant> ids;
 
 	// Various meta data for IDs, decorations, names, etc.
-	std::unordered_map<uint32_t, Meta> meta;
+	std::unordered_map<ID, Meta> meta;
 
 	// Holds all IDs which have a certain type.
 	// This is needed so we can iterate through a specific kind of resource quickly,
 	// and in-order of module declaration.
-	SmallVector<uint32_t> ids_for_type[TypeCount];
+	SmallVector<ID> ids_for_type[TypeCount];
 
 	// Special purpose lists which contain a union of types.
 	// This is needed so we can declare specialization constants and structs in an interleaved fashion,
 	// among other things.
 	// Constants can be of struct type, and struct array sizes can use specialization constants.
-	SmallVector<uint32_t> ids_for_constant_or_type;
-	SmallVector<uint32_t> ids_for_constant_or_variable;
+	SmallVector<ID> ids_for_constant_or_type;
+	SmallVector<ID> ids_for_constant_or_variable;
 
 	// Declared capabilities and extensions in the SPIR-V module.
 	// Not really used except for reflection at the moment.
@@ -88,12 +88,12 @@ public:
 	};
 	using BlockMetaFlags = uint8_t;
 	SmallVector<BlockMetaFlags> block_meta;
-	std::unordered_map<uint32_t, uint32_t> continue_block_to_loop_header;
+	std::unordered_map<BlockID, BlockID> continue_block_to_loop_header;
 
 	// Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction.
 	// Entry points can therefore be seen as some sort of meta structure.
-	std::unordered_map<uint32_t, SPIREntryPoint> entry_points;
-	uint32_t default_entry_point = 0;
+	std::unordered_map<FunctionID, SPIREntryPoint> entry_points;
+	FunctionID default_entry_point = 0;
 
 	struct Source
 	{
@@ -114,50 +114,75 @@ public:
 	// Can be useful for simple "raw" reflection.
 	// However, most members are here because the Parser needs most of these,
 	// and might as well just have the whole suite of decoration/name handling in one place.
-	void set_name(uint32_t id, const std::string &name);
-	const std::string &get_name(uint32_t id) const;
-	void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
-	void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
-	bool has_decoration(uint32_t id, spv::Decoration decoration) const;
-	uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
-	const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
-	const Bitset &get_decoration_bitset(uint32_t id) const;
-	void unset_decoration(uint32_t id, spv::Decoration decoration);
+	void set_name(ID id, const std::string &name);
+	const std::string &get_name(ID id) const;
+	void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+	void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
+	bool has_decoration(ID id, spv::Decoration decoration) const;
+	uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+	const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
+	const Bitset &get_decoration_bitset(ID id) const;
+	void unset_decoration(ID id, spv::Decoration decoration);
 
 	// Decoration handling methods (for members of a struct).
-	void set_member_name(uint32_t id, uint32_t index, const std::string &name);
-	const std::string &get_member_name(uint32_t id, uint32_t index) const;
-	void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
-	void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+	void set_member_name(TypeID id, uint32_t index, const std::string &name);
+	const std::string &get_member_name(TypeID id, uint32_t index) const;
+	void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+	void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
 	                                  const std::string &argument);
-	uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
-	void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+	uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
+	void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
 
-	void mark_used_as_array_length(uint32_t id);
+	void mark_used_as_array_length(ID id);
 	uint32_t increase_bound_by(uint32_t count);
 	Bitset get_buffer_block_flags(const SPIRVariable &var) const;
 
-	void add_typed_id(Types type, uint32_t id);
-	void remove_typed_id(Types type, uint32_t id);
+	void add_typed_id(Types type, ID id);
+	void remove_typed_id(Types type, ID id);
+
+	class LoopLock
+	{
+	public:
+		explicit LoopLock(uint32_t *counter);
+		LoopLock(const LoopLock &) = delete;
+		void operator=(const LoopLock &) = delete;
+		LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+		LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+		~LoopLock();
+
+	private:
+		uint32_t *lock;
+	};
+
+	// This must be held while iterating over a type ID array.
+	// It is undefined if someone calls set<>() while we're iterating over a data structure, so we must
+	// make sure that this case is avoided.
+
+	// If we have a hard lock, it is an error to call set<>(), and an exception is thrown.
+	// If we have a soft lock, we silently ignore any additions to the typed arrays.
+	// This should only be used for physical ID remapping where we need to create an ID, but we will never
+	// care about iterating over them.
+	LoopLock create_loop_hard_lock() const;
+	LoopLock create_loop_soft_lock() const;
 
 	template <typename T, typename Op>
 	void for_each_typed_id(const Op &op)
 	{
-		loop_iteration_depth++;
+		auto loop_lock = create_loop_hard_lock();
 		for (auto &id : ids_for_type[T::type])
 		{
 			if (ids[id].get_type() == static_cast<Types>(T::type))
 				op(id, get<T>(id));
 		}
-		loop_iteration_depth--;
 	}
 
 	template <typename T, typename Op>
 	void for_each_typed_id(const Op &op) const
 	{
+		auto loop_lock = create_loop_hard_lock();
 		for (auto &id : ids_for_type[T::type])
 		{
 			if (ids[id].get_type() == static_cast<Types>(T::type))
@@ -173,8 +198,8 @@ public:
 
 	void reset_all_of_type(Types type);
 
-	Meta *find_meta(uint32_t id);
-	const Meta *find_meta(uint32_t id) const;
+	Meta *find_meta(ID id);
+	const Meta *find_meta(ID id) const;
 
 	const std::string &get_empty_string() const
 	{
@@ -194,7 +219,8 @@ private:
 		return variant_get<T>(ids[id]);
 	}
 
-	uint32_t loop_iteration_depth = 0;
+	mutable uint32_t loop_iteration_depth_hard = 0;
+	mutable uint32_t loop_iteration_depth_soft = 0;
 	std::string empty_string;
 	Bitset cleared_bitset;
 };
diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
index 32582fb..bcdcd5f 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
@@ -288,8 +288,17 @@ static uint32_t pls_format_to_components(PlsFormat format)
 
 static const char *vector_swizzle(int vecsize, int index)
 {
-	static const char *swizzle[4][4] = {
-		{ ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" }
+	static const char *const swizzle[4][4] = {
+		{ ".x", ".y", ".z", ".w" },
+		{ ".xy", ".yz", ".zw", nullptr },
+		{ ".xyz", ".yzw", nullptr, nullptr },
+#if defined(__GNUC__) && (__GNUC__ == 9)
+		// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
+		// This array ends up being compiled as all nullptrs, tripping the assertions below.
+		{ "", nullptr, nullptr, "$" },
+#else
+		{ "", nullptr, nullptr, nullptr },
+#endif
 	};
 
 	assert(vecsize >= 1 && vecsize <= 4);
@@ -313,6 +322,7 @@ void CompilerGLSL::reset()
 	// Clear temporary usage tracking.
 	expression_usage_counts.clear();
 	forwarded_temporaries.clear();
+	suppressed_usage_tracking.clear();
 
 	reset_name_caches();
 
@@ -494,11 +504,14 @@ string CompilerGLSL::compile()
 	backend.supports_extensions = true;
 
 	// Scan the SPIR-V to find trivial uses of extensions.
+	fixup_type_alias();
+	reorder_type_alias();
 	build_function_control_flow_graphs_and_analyze();
 	find_static_extensions();
 	fixup_image_load_store_access();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
 
 	// Shaders might cast unrelated data to pointers of non-block types.
 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
@@ -523,6 +536,25 @@ string CompilerGLSL::compile()
 		pass_count++;
 	} while (is_forcing_recompilation());
 
+	// Implement the interlocked wrapper function at the end.
+	// The body was implemented in lieu of main().
+	if (interlocked_is_complex)
+	{
+		statement("void main()");
+		begin_scope();
+		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
+		if (options.es)
+			statement("beginInvocationInterlockNV();");
+		else
+			statement("beginInvocationInterlockARB();");
+		statement("spvMainInterlockedBody();");
+		if (options.es)
+			statement("endInvocationInterlockNV();");
+		else
+			statement("endInvocationInterlockARB();");
+		end_scope();
+	}
+
 	// Entry point in GLSL is always main().
 	get_entry_point().name = "main";
 
@@ -589,6 +621,30 @@ void CompilerGLSL::emit_header()
 			require_extension_internal("GL_ARB_shader_image_load_store");
 	}
 
+	// Needed for: layout(post_depth_coverage) in;
+	if (execution.flags.get(ExecutionModePostDepthCoverage))
+		require_extension_internal("GL_ARB_post_depth_coverage");
+
+	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+	{
+		if (options.es)
+		{
+			if (options.version < 310)
+				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+		}
+		else
+		{
+			if (options.version < 420)
+				require_extension_internal("GL_ARB_shader_image_load_store");
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+		}
+	}
+
 	for (auto &ext : forced_extensions)
 	{
 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@@ -624,6 +680,19 @@ void CompilerGLSL::emit_header()
 				statement("#endif");
 			}
 		}
+		else if (ext == "GL_ARB_post_depth_coverage")
+		{
+			if (options.es)
+				statement("#extension GL_EXT_post_depth_coverage : require");
+			else
+			{
+				statement("#if defined(GL_ARB_post_depth_coverge)");
+				statement("#extension GL_ARB_post_depth_coverage : require");
+				statement("#else");
+				statement("#extension GL_EXT_post_depth_coverage : require");
+				statement("#endif");
+			}
+		}
 		else
 			statement("#extension ", ext, " : require");
 	}
@@ -698,7 +767,8 @@ void CompilerGLSL::emit_header()
 
 			// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
 			// declarations before we can emit the work group size.
-			if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0)))
+			if (options.vulkan_semantics ||
+			    ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
 				build_workgroup_size(inputs, wg_x, wg_y, wg_z);
 		}
 		else
@@ -752,6 +822,17 @@ void CompilerGLSL::emit_header()
 
 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
 			inputs.push_back("early_fragment_tests");
+		if (execution.flags.get(ExecutionModePostDepthCoverage))
+			inputs.push_back("post_depth_coverage");
+
+		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+			inputs.push_back("pixel_interlock_ordered");
+		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+			inputs.push_back("pixel_interlock_unordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+			inputs.push_back("sample_interlock_ordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+			inputs.push_back("sample_interlock_unordered");
 
 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
 			statement("layout(depth_greater) out float gl_FragDepth;");
@@ -784,7 +865,8 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 	// Type-punning with these types is legal, which complicates things
 	// when we are storing struct and array types in an SSBO for example.
 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
-	if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+	if (type.type_alias != TypeID(0) &&
+	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
 		return;
 
 	add_resource_name(type.self);
@@ -812,6 +894,9 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 		emitted = true;
 	}
 
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
+		emit_struct_padding_target(type);
+
 	end_scope_decl();
 
 	if (emitted)
@@ -821,8 +906,6 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 {
 	string res;
-	if (flags.get(DecorationNonUniformEXT))
-		res += "nonuniformEXT ";
 	//if (flags & (1ull << DecorationSmooth))
 	//    res += "smooth ";
 	if (flags.get(DecorationFlat))
@@ -902,7 +985,8 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 
 	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
 	// This is only done selectively in GLSL as needed.
-	if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset))
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
+	    dec.decoration_flags.get(DecorationOffset))
 		attr.push_back(join("offset = ", dec.offset));
 
 	if (attr.empty())
@@ -1322,14 +1406,20 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 		// Only care about packing if we are in the given range
 		if (offset >= start_offset)
 		{
+			uint32_t actual_offset = type_struct_member_offset(type, i);
+
 			// We only care about offsets in std140, std430, etc ...
 			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
 			if (!packing_has_flexible_offset(packing))
 			{
-				uint32_t actual_offset = type_struct_member_offset(type, i);
 				if (actual_offset != offset) // This cannot be the packing we're looking for.
 					return false;
 			}
+			else if ((actual_offset & (alignment - 1)) != 0)
+			{
+				// We still need to verify that alignment rules are observed, even if we have explicit offset.
+				return false;
+			}
 
 			// Verify array stride rules.
 			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
@@ -1408,6 +1498,8 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 
 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
 		attr.push_back("push_constant");
+	else if (var.storage == StorageClassShaderRecordBufferNV)
+		attr.push_back("shaderRecordNV");
 
 	if (flags.get(DecorationRowMajor))
 		attr.push_back("row_major");
@@ -1453,14 +1545,14 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 
 	// Do not emit set = decoration in regular GLSL output, but
 	// we need to preserve it in Vulkan GLSL mode.
-	if (var.storage != StorageClassPushConstant)
+	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV)
 	{
 		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
 			attr.push_back(join("set = ", dec.set));
 	}
 
 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
-	bool ssbo_block = var.storage == StorageClassStorageBuffer ||
+	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
 	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
 	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
 	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
@@ -1482,6 +1574,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
 		can_use_binding = false;
 
+	if (var.storage == StorageClassShaderRecordBufferNV)
+		can_use_binding = false;
+
 	if (can_use_binding && flags.get(DecorationBinding))
 		attr.push_back(join("binding = ", dec.binding));
 
@@ -1517,9 +1612,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	return res;
 }
 
-string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check_std430)
+string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
 {
-	if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430))
+	if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
 		return "std430";
 	else if (buffer_is_packing_standard(type, BufferPackingStd140))
 		return "std140";
@@ -1528,7 +1623,8 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
 		require_extension_internal("GL_EXT_scalar_block_layout");
 		return "scalar";
 	}
-	else if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
+	else if (support_std430_without_scalar_layout &&
+	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
 	{
 		if (options.es && !options.vulkan_semantics)
 			SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
@@ -1536,7 +1632,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
 		if (!options.es && !options.vulkan_semantics && options.version < 440)
 			require_extension_internal("GL_ARB_enhanced_layouts");
 
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		return "std430";
 	}
 	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
@@ -1550,15 +1646,30 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
 		if (!options.es && !options.vulkan_semantics && options.version < 440)
 			require_extension_internal("GL_ARB_enhanced_layouts");
 
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		return "std140";
 	}
 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
 	{
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		require_extension_internal("GL_EXT_scalar_block_layout");
 		return "scalar";
 	}
+	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
+	         buffer_is_packing_standard(type, BufferPackingStd430))
+	{
+		// UBOs can support std430 with GL_EXT_scalar_block_layout.
+		require_extension_internal("GL_EXT_scalar_block_layout");
+		return "std430";
+	}
+	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
+	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
+	{
+		// UBOs can support std430 with GL_EXT_scalar_block_layout.
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
+		require_extension_internal("GL_EXT_scalar_block_layout");
+		return "std430";
+	}
 	else
 	{
 		SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
@@ -1727,7 +1838,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 	auto &type = get<SPIRType>(var.basetype);
 
 	Bitset flags = ir.get_buffer_block_flags(var);
-	bool ssbo = var.storage == StorageClassStorageBuffer ||
+	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 	bool is_restrict = ssbo && flags.get(DecorationRestrict);
 	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
@@ -1844,6 +1955,14 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
 	{
 		return "hitAttributeNV ";
 	}
+	else if (var.storage == StorageClassCallableDataNV)
+	{
+		return "callableDataNV ";
+	}
+	else if (var.storage == StorageClassIncomingCallableDataNV)
+	{
+		return "callableDataInNV ";
+	}
 
 	return "";
 }
@@ -2015,7 +2134,7 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
 	auto name = to_name(constant.self);
 
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
 	// This specialization constant is implicitly declared by emitting layout() in;
 	if (constant.self == workgroup_size_id)
@@ -2024,7 +2143,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
 	// These specialization constants are implicitly declared by emitting layout() in;
 	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
 	// later can use macro overrides for work group size.
-	bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id;
+	bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
+	                                  ConstantID(constant.self) == wg_z.id;
 
 	if (options.vulkan_semantics && is_workgroup_size_constant)
 	{
@@ -2374,7 +2494,7 @@ void CompilerGLSL::declare_undefined_values()
 
 bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
 {
-	bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable;
+	bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
 
 	if (statically_assigned)
 	{
@@ -2446,44 +2566,47 @@ void CompilerGLSL::emit_resources()
 	// emit specialization constants as actual floats,
 	// spec op expressions will redirect to the constant name.
 	//
-	for (auto &id_ : ir.ids_for_constant_or_type)
 	{
-		auto &id = ir.ids[id_];
-
-		if (id.get_type() == TypeConstant)
+		auto loop_lock = ir.create_loop_hard_lock();
+		for (auto &id_ : ir.ids_for_constant_or_type)
 		{
-			auto &c = id.get<SPIRConstant>();
-
-			bool needs_declaration = c.specialization || c.is_used_as_lut;
+			auto &id = ir.ids[id_];
 
-			if (needs_declaration)
+			if (id.get_type() == TypeConstant)
 			{
-				if (!options.vulkan_semantics && c.specialization)
+				auto &c = id.get<SPIRConstant>();
+
+				bool needs_declaration = c.specialization || c.is_used_as_lut;
+
+				if (needs_declaration)
 				{
-					c.specialization_constant_macro_name =
-					    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+					if (!options.vulkan_semantics && c.specialization)
+					{
+						c.specialization_constant_macro_name =
+						    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+					}
+					emit_constant(c);
+					emitted = true;
 				}
-				emit_constant(c);
+			}
+			else if (id.get_type() == TypeConstantOp)
+			{
+				emit_specialization_constant_op(id.get<SPIRConstantOp>());
 				emitted = true;
 			}
-		}
-		else if (id.get_type() == TypeConstantOp)
-		{
-			emit_specialization_constant_op(id.get<SPIRConstantOp>());
-			emitted = true;
-		}
-		else if (id.get_type() == TypeType)
-		{
-			auto &type = id.get<SPIRType>();
-			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
-			     !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+			else if (id.get_type() == TypeType)
 			{
-				if (emitted)
-					statement("");
-				emitted = false;
+				auto &type = id.get<SPIRType>();
+				if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
+				    (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
+				     !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+				{
+					if (emitted)
+						statement("");
+					emitted = false;
 
-				emit_struct(type);
+					emit_struct(type);
+				}
 			}
 		}
 	}
@@ -2500,7 +2623,7 @@ void CompilerGLSL::emit_resources()
 		SpecializationConstant wg_x, wg_y, wg_z;
 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
-		if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0))
+		if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
 		{
 			SmallVector<string> inputs;
 			build_workgroup_size(inputs, wg_x, wg_y, wg_z);
@@ -2545,7 +2668,8 @@ void CompilerGLSL::emit_resources()
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
 
-		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
+		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
+		                        type.storage == StorageClassShaderRecordBufferNV;
 		bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
 		                       ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 
@@ -2585,8 +2709,9 @@ void CompilerGLSL::emit_resources()
 
 		if (var.storage != StorageClassFunction && type.pointer &&
 		    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
-		     type.storage == StorageClassRayPayloadNV || type.storage == StorageClassHitAttributeNV ||
-		     type.storage == StorageClassIncomingRayPayloadNV) &&
+		     type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV ||
+		     type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV ||
+		     type.storage == StorageClassHitAttributeNV) &&
 		    !is_hidden_variable(var))
 		{
 			emit_uniform(var);
@@ -2646,7 +2771,7 @@ void CompilerGLSL::emit_resources()
 // Returns a string representation of the ID, usable as a function arg.
 // Default is to simply return the expression representation fo the arg ID.
 // Subclasses may override to modify the return value.
-string CompilerGLSL::to_func_call_arg(uint32_t id)
+string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
 {
 	// Make sure that we use the name of the original variable, and not the parameter alias.
 	uint32_t name_id = id;
@@ -2667,8 +2792,8 @@ void CompilerGLSL::handle_invalid_expression(uint32_t id)
 // Converts the format of the current expression from packed to unpacked,
 // by wrapping the expression in a constructor of the appropriate type.
 // GLSL does not support packed formats, so simply return the expression.
-// Subclasses that do will override
-string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t)
+// Subclasses that do will override.
+string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
 {
 	return expr_str;
 }
@@ -2762,13 +2887,22 @@ string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std
 
 string CompilerGLSL::address_of_expression(const std::string &expr)
 {
-	// If this expression starts with a dereference operator ('*'), then
-	// just return the part after the operator.
-	// TODO: Strip parens if unnecessary?
-	if (expr.front() == '*')
+	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
+	{
+		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
+		// the first two and last characters. We might have to enclose the expression.
+		// This doesn't work for cases like (*foo + 10),
+		// but this is an r-value expression which we cannot take the address of anyways.
+		return enclose_expression(expr.substr(2, expr.size() - 3));
+	}
+	else if (expr.front() == '*')
+	{
+		// If this expression starts with a dereference operator ('*'), then
+		// just return the part after the operator.
 		return expr.substr(1);
+	}
 	else
-		return join('&', expr);
+		return join('&', enclose_expression(expr));
 }
 
 // Just like to_expression except that we enclose the expression inside parentheses if needed.
@@ -2777,14 +2911,30 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expressio
 	return enclose_expression(to_expression(id, register_expression_read));
 }
 
+// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
+// need_transpose must be forced to false.
+string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
+{
+	return unpack_expression_type(to_expression(id), expression_type(id),
+	                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+	                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
+}
+
 string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
 {
 	// If we need to transpose, it will also take care of unpacking rules.
 	auto *e = maybe_get<SPIRExpression>(id);
 	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
-		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
-		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
+	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+
+	if (!need_transpose && (is_remapped || is_packed))
+	{
+		return unpack_expression_type(to_expression(id, register_expression_read),
+		                              get_pointee_type(expression_type_id(id)),
+		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
+	}
 	else
 		return to_expression(id, register_expression_read);
 }
@@ -2794,9 +2944,14 @@ string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_
 	// If we need to transpose, it will also take care of unpacking rules.
 	auto *e = maybe_get<SPIRExpression>(id);
 	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
+	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+	if (!need_transpose && (is_remapped || is_packed))
+	{
 		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
-		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
+		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
+	}
 	else
 		return to_enclosed_expression(id, register_expression_read);
 }
@@ -2831,12 +2986,55 @@ string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_e
 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
 {
 	auto expr = to_enclosed_expression(id);
-	if (has_extended_decoration(id, SPIRVCrossDecorationPacked))
+	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
 		return join(expr, "[", index, "]");
 	else
 		return join(expr, ".", index_to_swizzle(index));
 }
 
+string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
+{
+	uint32_t size = to_array_size_literal(type);
+	auto &parent = get<SPIRType>(type.parent_type);
+	string expr = "{ ";
+
+	for (uint32_t i = 0; i < size; i++)
+	{
+		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
+		if (parent.array.empty())
+			expr += subexpr;
+		else
+			expr += to_rerolled_array_expression(subexpr, parent);
+
+		if (i + 1 < size)
+			expr += ", ";
+	}
+
+	expr += " }";
+	return expr;
+}
+
+string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
+{
+	auto &type = expression_type(id);
+	if (!backend.array_is_value_type && !type.array.empty())
+	{
+		// For this case, we need to "re-roll" an array initializer from a temporary.
+		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
+		// participate in a struct initializer. E.g.
+		// float arr[2] = { 1.0, 2.0 };
+		// Foo foo = { arr }; must be transformed to
+		// Foo foo = { { arr[0], arr[1] } };
+		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
+
+		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
+		// as temporaries anyways.
+		return to_rerolled_array_expression(to_enclosed_expression(id), type);
+	}
+	else
+		return to_unpacked_expression(id);
+}
+
 string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
 {
 	auto itr = invalid_expressions.find(id);
@@ -2874,8 +3072,12 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
 			return to_enclosed_expression(e.base_expression) + e.expression;
 		else if (e.need_transpose)
 		{
-			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked);
-			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
+			// This should not be reached for access chains, since we always deal explicitly with transpose state
+			// when consuming an access chain expression.
+			uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
+			                                is_packed);
 		}
 		else
 		{
@@ -3675,15 +3877,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 		if (splat)
 		{
 			res += convert_to_string(c.scalar(vector, 0));
-			if (is_legacy())
-			{
-				// Fake unsigned constant literals with signed ones if possible.
-				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-				if (c.scalar_i16(vector, 0) < 0)
-					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
-			}
-			else
-				res += backend.uint16_t_literal_suffix;
 		}
 		else
 		{
@@ -3693,17 +3886,19 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 					res += to_name(c.specialization_constant_id(vector, i));
 				else
 				{
-					res += convert_to_string(c.scalar(vector, i));
-					if (is_legacy())
+					if (*backend.uint16_t_literal_suffix)
 					{
-						// Fake unsigned constant literals with signed ones if possible.
-						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-						if (c.scalar_i16(vector, i) < 0)
-							SPIRV_CROSS_THROW(
-							    "Tried to convert uint literal into int, but this made the literal negative.");
+						res += convert_to_string(c.scalar_u16(vector, i));
+						res += backend.uint16_t_literal_suffix;
 					}
 					else
-						res += backend.uint16_t_literal_suffix;
+					{
+						// If backend doesn't have a literal suffix, we need to value cast.
+						res += type_to_glsl(scalar_type);
+						res += "(";
+						res += convert_to_string(c.scalar_u16(vector, i));
+						res += ")";
+					}
 				}
 
 				if (i + 1 < c.vector_size())
@@ -3716,7 +3911,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 		if (splat)
 		{
 			res += convert_to_string(c.scalar_i16(vector, 0));
-			res += backend.int16_t_literal_suffix;
 		}
 		else
 		{
@@ -3726,9 +3920,21 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 					res += to_name(c.specialization_constant_id(vector, i));
 				else
 				{
-					res += convert_to_string(c.scalar_i16(vector, i));
-					res += backend.int16_t_literal_suffix;
+					if (*backend.int16_t_literal_suffix)
+					{
+						res += convert_to_string(c.scalar_i16(vector, i));
+						res += backend.int16_t_literal_suffix;
+					}
+					else
+					{
+						// If backend doesn't have a literal suffix, we need to value cast.
+						res += type_to_glsl(scalar_type);
+						res += "(";
+						res += convert_to_string(c.scalar_i16(vector, i));
+						res += ")";
+					}
 				}
+
 				if (i + 1 < c.vector_size())
 					res += ", ";
 			}
@@ -3883,9 +4089,14 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
 	}
 }
 
-bool CompilerGLSL::expression_is_forwarded(uint32_t id)
+bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
+{
+	return forwarded_temporaries.count(id) != 0;
+}
+
+bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
 {
-	return forwarded_temporaries.find(id) != end(forwarded_temporaries);
+	return suppressed_usage_tracking.count(id) != 0;
 }
 
 SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
@@ -3895,8 +4106,9 @@ SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id,
 	{
 		// Just forward it without temporary.
 		// If the forward is trivial, we do not force flushing to temporary for this expression.
-		if (!suppress_usage_tracking)
-			forwarded_temporaries.insert(result_id);
+		forwarded_temporaries.insert(result_id);
+		if (suppress_usage_tracking)
+			suppressed_usage_tracking.insert(result_id);
 
 		return set<SPIRExpression>(result_id, rhs, result_type, true);
 	}
@@ -3947,8 +4159,18 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_
 }
 
 void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                           const char *op)
+                                           const char *op, bool negate, SPIRType::BaseType expected_type)
 {
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
+
+	SPIRType target_type0 = type0;
+	SPIRType target_type1 = type1;
+	target_type0.basetype = expected_type;
+	target_type1.basetype = expected_type;
+	target_type0.vecsize = 1;
+	target_type1.vecsize = 1;
+
 	auto &type = get<SPIRType>(result_type);
 	auto expr = type_to_glsl_constructor(type);
 	expr += '(';
@@ -3956,11 +4178,25 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result
 	{
 		// Make sure to call to_expression multiple times to ensure
 		// that these expressions are properly flushed to temporaries if needed.
-		expr += to_extract_component_expression(op0, i);
+		if (negate)
+			expr += "!(";
+
+		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
+			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
+		else
+			expr += to_extract_component_expression(op0, i);
+
 		expr += ' ';
 		expr += op;
 		expr += ' ';
-		expr += to_extract_component_expression(op1, i);
+
+		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
+			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
+		else
+			expr += to_extract_component_expression(op1, i);
+
+		if (negate)
+			expr += ")";
 
 		if (i + 1 < type.vecsize)
 			expr += ", ";
@@ -4082,6 +4318,58 @@ void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result
 	inherit_expression_dependencies(result_id, op0);
 }
 
+// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
+// and different vector sizes all at once. Need a special purpose method here.
+void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                                   uint32_t op2, const char *op,
+                                                   SPIRType::BaseType expected_result_type,
+                                                   SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+                                                   SPIRType::BaseType input_type2)
+{
+	auto &out_type = get<SPIRType>(result_type);
+	auto expected_type = out_type;
+	expected_type.basetype = input_type0;
+
+	string cast_op0 =
+	    expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
+
+	auto op1_expr = to_unpacked_expression(op1);
+	auto op2_expr = to_unpacked_expression(op2);
+
+	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
+	expected_type.basetype = input_type1;
+	expected_type.vecsize = 1;
+	string cast_op1 = expression_type(op1).basetype != input_type1 ?
+	                      join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
+	                      op1_expr;
+
+	expected_type.basetype = input_type2;
+	expected_type.vecsize = 1;
+	string cast_op2 = expression_type(op2).basetype != input_type2 ?
+	                      join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
+	                      op2_expr;
+
+	string expr;
+	if (out_type.basetype != expected_result_type)
+	{
+		expected_type.vecsize = out_type.vecsize;
+		expected_type.basetype = expected_result_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+}
+
 void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
                                              uint32_t op2, const char *op, SPIRType::BaseType input_type)
 {
@@ -4170,6 +4458,44 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
 	inherit_expression_dependencies(result_id, op3);
 }
 
+void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                           uint32_t op2, uint32_t op3, const char *op,
+                                           SPIRType::BaseType offset_count_type)
+{
+	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
+	// and bitfieldInsert is sign invariant.
+	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
+
+	auto op0_expr = to_unpacked_expression(op0);
+	auto op1_expr = to_unpacked_expression(op1);
+	auto op2_expr = to_unpacked_expression(op2);
+	auto op3_expr = to_unpacked_expression(op3);
+
+	SPIRType target_type;
+	target_type.vecsize = 1;
+	target_type.basetype = offset_count_type;
+
+	if (expression_type(op2).basetype != offset_count_type)
+	{
+		// Value-cast here. Input might be 16-bit. GLSL requires int.
+		op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
+	}
+
+	if (expression_type(op3).basetype != offset_count_type)
+	{
+		// Value-cast here. Input might be 16-bit. GLSL requires int.
+		op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
+	}
+
+	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
+	        forward);
+
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+	inherit_expression_dependencies(result_id, op3);
+}
+
 // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
 // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
 // functions in vertex shaders so we revert those back to plain calls when
@@ -4401,7 +4727,7 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
 	}
 
 	string mix_op;
-	bool has_boolean_mix = backend.boolean_mix_support &&
+	bool has_boolean_mix = *backend.boolean_mix_function &&
 	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
 	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
 
@@ -4431,11 +4757,13 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
 		inherit_expression_dependencies(id, right);
 		inherit_expression_dependencies(id, lerp);
 	}
+	else if (lerptype.basetype == SPIRType::Boolean)
+		emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
 	else
 		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
 }
 
-string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
+string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
 {
 	// Keep track of the array indices we have used to load the image.
 	// We'll need to use the same array index into the combined image sampler array.
@@ -4457,18 +4785,18 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_
 		samp_id = samp->self;
 
 	auto image_itr = find_if(begin(args), end(args),
-	                         [image_id](const SPIRFunction::Parameter &param) { return param.id == image_id; });
+	                         [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
 
 	auto sampler_itr = find_if(begin(args), end(args),
-	                           [samp_id](const SPIRFunction::Parameter &param) { return param.id == samp_id; });
+	                           [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
 
 	if (image_itr != end(args) || sampler_itr != end(args))
 	{
 		// If any parameter originates from a parameter, we will find it in our argument list.
 		bool global_image = image_itr == end(args);
 		bool global_sampler = sampler_itr == end(args);
-		uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args));
-		uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args));
+		VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
+		VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
 
 		auto &combined = current_function->combined_parameters;
 		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
@@ -4509,15 +4837,16 @@ void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
 	{
 		emit_binary_func_op(result_type, result_id, image_id, samp_id,
 		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
-
-		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
-		forwarded_temporaries.erase(result_id);
 	}
 	else
 	{
 		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
 		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
 	}
+
+	// Make sure to suppress usage tracking and any expression invalidation.
+	// It is illegal to create temporaries of opaque types.
+	forwarded_temporaries.erase(result_id);
 }
 
 static inline bool image_opcode_is_sample_no_dref(Op op)
@@ -4547,13 +4876,41 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 {
 	auto *ops = stream(i);
 	auto op = static_cast<Op>(i.op);
-	uint32_t length = i.length;
 
 	SmallVector<uint32_t> inherited_expressions;
 
-	uint32_t result_type = ops[0];
+	uint32_t result_type_id = ops[0];
 	uint32_t id = ops[1];
-	uint32_t img = ops[2];
+
+	bool forward = false;
+	string expr = to_texture_op(i, &forward, inherited_expressions);
+	emit_op(result_type_id, id, expr, forward);
+	for (auto &inherit : inherited_expressions)
+		inherit_expression_dependencies(id, inherit);
+
+	switch (op)
+	{
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+		register_control_dependent_expression(id);
+		break;
+
+	default:
+		break;
+	}
+}
+
+std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward,
+                                        SmallVector<uint32_t> &inherited_expressions)
+{
+	auto *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+	uint32_t length = i.length;
+
+	uint32_t result_type_id = ops[0];
+	VariableID img = ops[2];
 	uint32_t coord = ops[3];
 	uint32_t dref = 0;
 	uint32_t comp = 0;
@@ -4562,8 +4919,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	bool fetch = false;
 	const uint32_t *opt = nullptr;
 
+	auto &result_type = get<SPIRType>(result_type_id);
+
 	inherited_expressions.push_back(coord);
 
+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -4658,6 +5021,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	uint32_t offset = 0;
 	uint32_t coffsets = 0;
 	uint32_t sample = 0;
+	uint32_t minlod = 0;
 	uint32_t flags = 0;
 
 	if (length)
@@ -4683,14 +5047,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	test(offset, ImageOperandsOffsetMask);
 	test(coffsets, ImageOperandsConstOffsetsMask);
 	test(sample, ImageOperandsSampleMask);
+	test(minlod, ImageOperandsMinLodMask);
 
 	string expr;
-	bool forward = false;
 	expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
-	                         (!!grad_x || !!grad_y), !!dref, lod);
+	                         (!!grad_x || !!grad_y), !!dref, lod, minlod);
 	expr += "(";
 	expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
-	                         coffset, offset, bias, comp, sample, &forward);
+	                         coffset, offset, bias, comp, sample, minlod, forward);
 	expr += ")";
 
 	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
@@ -4703,7 +5067,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	{
 		bool image_is_depth = false;
 		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
-		uint32_t image_id = combined ? combined->image : img;
+		VariableID image_id = combined ? combined->image : img;
 
 		if (combined && image_is_comparison(imgtype, combined->image))
 			image_is_depth = true;
@@ -4718,29 +5082,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 			image_is_depth = true;
 
 		if (image_is_depth)
-			expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
+			expr = remap_swizzle(result_type, 1, expr);
+	}
+
+	if (!backend.support_small_type_sampling_result && result_type.width < 32)
+	{
+		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
 	}
 
 	// Deals with reads from MSL. We might need to downconvert to fewer components.
 	if (op == OpImageRead)
-		expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+		expr = remap_swizzle(result_type, 4, expr);
 
-	emit_op(result_type, id, expr, forward);
-	for (auto &inherit : inherited_expressions)
-		inherit_expression_dependencies(id, inherit);
-
-	switch (op)
-	{
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleProjDrefImplicitLod:
-		register_control_dependent_expression(id);
-		break;
-
-	default:
-		break;
-	}
+	return expr;
 }
 
 bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
@@ -4753,10 +5109,13 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
 
 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
 // For some subclasses, the function is a method on the specified image.
-string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
                                       bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool,
-                                      uint32_t lod)
+                                      uint32_t lod, uint32_t minlod)
 {
+	if (minlod != 0)
+		SPIRV_CROSS_THROW("Sparse texturing not yet supported.");
+
 	string fname;
 
 	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
@@ -4812,10 +5171,19 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
 		{
 			if (options.vulkan_semantics)
 			{
-				// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
 				if (dummy_sampler_id)
-					SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining.");
-				require_extension_internal("GL_EXT_samplerless_texture_functions");
+				{
+					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
+					auto sampled_type = type;
+					sampled_type.basetype = SPIRType::SampledImage;
+					return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
+					            to_expression(dummy_sampler_id), ")");
+				}
+				else
+				{
+					// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
+					require_extension_internal("GL_EXT_samplerless_texture_functions");
+				}
 			}
 			else
 			{
@@ -4832,10 +5200,11 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
 }
 
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
                                       bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
                                       uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
-                                      uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward)
+                                      uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/,
+                                      bool *p_forward)
 {
 	string farg_str;
 	if (is_fetch)
@@ -5112,7 +5481,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
 	case GLSLstd450ModfStruct:
 	{
-		forced_temporaries.insert(id);
 		auto &type = get<SPIRType>(result_type);
 		emit_uninitialized_temporary_expression(result_type, id);
 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
@@ -5252,7 +5620,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
 	case GLSLstd450FrexpStruct:
 	{
-		forced_temporaries.insert(id);
 		auto &type = get<SPIRType>(result_type);
 		emit_uninitialized_temporary_expression(result_type, id);
 		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
@@ -5261,8 +5628,28 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 
 	case GLSLstd450Ldexp:
-		emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
+	{
+		bool forward = should_forward(args[0]) && should_forward(args[1]);
+
+		auto op0 = to_unpacked_expression(args[0]);
+		auto op1 = to_unpacked_expression(args[1]);
+		auto &op1_type = expression_type(args[1]);
+		if (op1_type.basetype != SPIRType::Int)
+		{
+			// Need a value cast here.
+			auto target_type = op1_type;
+			target_type.basetype = SPIRType::Int;
+			op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
+		}
+
+		auto expr = join("ldexp(", op0, ", ", op1, ")");
+
+		emit_op(result_type, id, expr, forward);
+		inherit_expression_dependencies(id, args[0]);
+		inherit_expression_dependencies(id, args[1]);
 		break;
+	}
+
 	case GLSLstd450PackSnorm4x8:
 		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
 		break;
@@ -5326,7 +5713,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
 	// Bit-fiddling
 	case GLSLstd450FindILsb:
-		emit_unary_func_op(result_type, id, args[0], "findLSB");
+		// findLSB always returns int.
+		emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
 		break;
 
 	case GLSLstd450FindSMsb:
@@ -5808,13 +6196,38 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
 	// 16-bit, 32-bit and 64-bit floats.
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
 		return "floatBitsToUint";
+	}
 	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
 		return "floatBitsToInt";
+	}
 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
 		return "uintBitsToFloat";
+	}
 	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
 		return "intBitsToFloat";
+	}
+
 	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
 		return "doubleBitsToInt64";
 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
@@ -6114,6 +6527,43 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 	case BuiltInIncomingRayFlagsNV:
 		return "gl_IncomingRayFlagsNV";
 
+	case BuiltInBaryCoordNV:
+	{
+		if (options.es && options.version < 320)
+			SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
+		else if (!options.es && options.version < 450)
+			SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
+		require_extension_internal("GL_NV_fragment_shader_barycentric");
+		return "gl_BaryCoordNV";
+	}
+
+	case BuiltInBaryCoordNoPerspNV:
+	{
+		if (options.es && options.version < 320)
+			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
+		else if (!options.es && options.version < 450)
+			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
+		require_extension_internal("GL_NV_fragment_shader_barycentric");
+		return "gl_BaryCoordNoPerspNV";
+	}
+
+	case BuiltInFragStencilRefEXT:
+	{
+		if (!options.es)
+		{
+			require_extension_internal("GL_ARB_shader_stencil_export");
+			return "gl_FragStencilRefARB";
+		}
+		else
+			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
+	}
+
+	case BuiltInDeviceIndex:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
+		require_extension_internal("GL_EXT_device_group");
+		return "gl_DeviceIndex";
+
 	default:
 		return join("gl_BuiltIn_", convert_to_string(builtin));
 	}
@@ -6147,7 +6597,16 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
 
 	if (!chain_only)
+	{
+		// We handle transpose explicitly, so don't resolve that here.
+		auto *e = maybe_get<SPIRExpression>(base);
+		bool old_transpose = e && e->need_transpose;
+		if (e)
+			e->need_transpose = false;
 		expr = to_enclosed_expression(base, register_expression_read);
+		if (e)
+			e->need_transpose = old_transpose;
+	}
 
 	// Start traversing type hierarchy at the proper non-pointer types,
 	// but keep type_id referencing the original pointer for use below.
@@ -6171,8 +6630,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
 	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
-	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked);
-	uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType);
+	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
+	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
 	bool is_invariant = has_decoration(base, DecorationInvariant);
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;
@@ -6344,9 +6803,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			BuiltIn builtin;
 			if (is_member_builtin(*type, index, &builtin))
 			{
-				// FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
-				// To make this properly work by omitting all OpName opcodes,
-				// we need to infer gl_in or gl_out based on the builtin, and stage.
 				if (access_chain_is_arrayed)
 				{
 					expr += ".";
@@ -6368,11 +6824,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			if (has_member_decoration(type->self, index, DecorationInvariant))
 				is_invariant = true;
 
-			is_packed = member_is_packed_type(*type, index);
-			if (is_packed)
-				packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType);
+			is_packed = member_is_packed_physical_type(*type, index);
+			if (member_is_remapped_physical_type(*type, index))
+				physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
 			else
-				packed_type = 0;
+				physical_type = 0;
 
 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
 			type = &get<SPIRType>(type->member_types[index]);
@@ -6380,13 +6836,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		// Matrix -> Vector
 		else if (type->columns > 1)
 		{
-			if (row_major_matrix_needs_conversion)
-			{
-				expr = convert_row_major_matrix(expr, *type, is_packed);
-				row_major_matrix_needs_conversion = false;
-				is_packed = false;
-				packed_type = 0;
-			}
+			// If we have a row-major matrix here, we need to defer any transpose in case this access chain
+			// is used to store a column. We can resolve it right here and now if we access a scalar directly,
+			// by flipping indexing order of the matrix.
 
 			expr += "[";
 			if (index_is_literal)
@@ -6401,16 +6853,36 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
 		{
-			if (index_is_literal && !is_packed)
+			string deferred_index;
+			if (row_major_matrix_needs_conversion)
+			{
+				// Flip indexing order.
+				auto column_index = expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					deferred_index = expr.substr(column_index);
+					expr.resize(column_index);
+				}
+			}
+
+			if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
 			{
 				expr += ".";
 				expr += index_to_swizzle(index);
 			}
-			else if (ir.ids[index].get_type() == TypeConstant && !is_packed)
+			else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
 			{
 				auto &c = get<SPIRConstant>(index);
-				expr += ".";
-				expr += index_to_swizzle(c.scalar());
+				if (c.specialization)
+				{
+					// If the index is a spec constant, we cannot turn extract into a swizzle.
+					expr += join("[", to_expression(index), "]");
+				}
+				else
+				{
+					expr += ".";
+					expr += index_to_swizzle(c.scalar());
+				}
 			}
 			else if (index_is_literal)
 			{
@@ -6424,8 +6896,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				expr += "]";
 			}
 
+			expr += deferred_index;
+			row_major_matrix_needs_conversion = false;
+
 			is_packed = false;
-			packed_type = 0;
+			physical_type = 0;
 			type_id = type->parent_type;
 			type = &get<SPIRType>(type_id);
 		}
@@ -6445,7 +6920,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		meta->need_transpose = row_major_matrix_needs_conversion;
 		meta->storage_is_packed = is_packed;
 		meta->storage_is_invariant = is_invariant;
-		meta->storage_packed_type = packed_type;
+		meta->storage_physical_type = physical_type;
 	}
 
 	return expr;
@@ -6587,7 +7062,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 
 		// Cannot forward transpositions, so resolve them here.
 		if (need_transpose)
-			expr += convert_row_major_matrix(tmp, member_type, false);
+			expr += convert_row_major_matrix(tmp, member_type, 0, false);
 		else
 			expr += tmp;
 	}
@@ -6903,7 +7378,7 @@ bool CompilerGLSL::should_dereference(uint32_t id)
 	return true;
 }
 
-bool CompilerGLSL::should_forward(uint32_t id)
+bool CompilerGLSL::should_forward(uint32_t id) const
 {
 	// If id is a variable we will try to forward it regardless of force_temporary check below
 	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
@@ -6922,6 +7397,12 @@ bool CompilerGLSL::should_forward(uint32_t id)
 	return false;
 }
 
+bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
+{
+	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
+	return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
+}
+
 void CompilerGLSL::track_expression_read(uint32_t id)
 {
 	switch (ir.ids[id].get_type())
@@ -6948,7 +7429,7 @@ void CompilerGLSL::track_expression_read(uint32_t id)
 
 	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
 	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
-	if (expression_is_forwarded(id))
+	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
 	{
 		auto &v = expression_usage_counts[id];
 		v++;
@@ -7019,18 +7500,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
 	return expr;
 }
 
+void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
+{
+	if (var.allocate_temporary_copy)
+	{
+		auto &type = get<SPIRType>(var.basetype);
+		auto &flags = get_decoration_bitset(var.self);
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+	}
+}
+
 void CompilerGLSL::flush_variable_declaration(uint32_t id)
 {
 	auto *var = maybe_get<SPIRVariable>(id);
 	if (var && var->deferred_declaration)
 	{
 		statement(variable_decl_function_local(*var), ";");
-		if (var->allocate_temporary_copy)
-		{
-			auto &type = get<SPIRType>(var->basetype);
-			auto &flags = ir.meta[id].decoration.decoration_flags;
-			statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";");
-		}
+		emit_variable_temporary_copies(*var);
 		var->deferred_declaration = false;
 	}
 }
@@ -7140,7 +7626,7 @@ bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
 
 string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
 {
-	uint32_t base = 0;
+	ID base = 0;
 	string op;
 	string subop;
 
@@ -7198,10 +7684,10 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32
 
 			if (i)
 				op += ", ";
-			subop = to_expression(elems[i]);
+			subop = to_composite_constructor_expression(elems[i]);
 		}
 
-		base = e ? e->base_expression : 0;
+		base = e ? e->base_expression : ID(0);
 	}
 
 	if (swizzle_optimization)
@@ -7285,14 +7771,19 @@ void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
 
 void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
 {
-	if (forwarded_temporaries.count(expr.self))
+	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
+	// these will be marked as having suppressed usage tracking.
+	// Our only concern is to make sure arithmetic operations are done in similar ways.
+	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
+	    forced_invariant_temporaries.count(expr.self) == 0)
 	{
 		forced_temporaries.insert(expr.self);
+		forced_invariant_temporaries.insert(expr.self);
 		force_recompile();
-	}
 
-	for (auto &dependent : expr.expression_dependencies)
-		disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
+		for (auto &dependent : expr.expression_dependencies)
+			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
+	}
 }
 
 void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
@@ -7357,6 +7848,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst
 	case OpSLessThanEqual:
 	case OpSGreaterThan:
 	case OpSGreaterThanEqual:
+	case OpULessThan:
+	case OpULessThanEqual:
+	case OpUGreaterThan:
+	case OpUGreaterThanEqual:
 		return expression_type(ops[2]).width;
 
 	default:
@@ -7442,19 +7937,39 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		bool old_need_transpose = false;
 
 		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
-		if (ptr_expression && ptr_expression->need_transpose)
+
+		if (forward)
 		{
-			old_need_transpose = true;
-			ptr_expression->need_transpose = false;
-			need_transpose = true;
+			// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
+			// taking the expression.
+			if (ptr_expression && ptr_expression->need_transpose)
+			{
+				old_need_transpose = true;
+				ptr_expression->need_transpose = false;
+				need_transpose = true;
+			}
+			else if (is_non_native_row_major_matrix(ptr))
+				need_transpose = true;
 		}
-		else if (is_non_native_row_major_matrix(ptr))
-			need_transpose = true;
 
 		// If we are forwarding this load,
 		// don't register the read to access chain here, defer that to when we actually use the expression,
 		// using the add_implied_read_expression mechanism.
-		auto expr = to_dereferenced_expression(ptr, !forward);
+		string expr;
+
+		bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
+		bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
+		if (forward || (!is_packed && !is_remapped))
+		{
+			// For the simple case, we do not need to deal with repacking.
+			expr = to_dereferenced_expression(ptr, false);
+		}
+		else
+		{
+			// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
+			// storing the expression to a temporary.
+			expr = to_unpacked_expression(ptr);
+		}
 
 		// We might need to bitcast in order to load from a builtin.
 		bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
@@ -7465,10 +7980,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		unroll_array_from_complex_load(id, ptr, expr);
 
 		auto &type = get<SPIRType>(result_type);
-		if (has_decoration(id, DecorationNonUniformEXT))
+		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
+		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
+		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
+		{
+			propagate_nonuniform_qualifier(ptr);
 			convert_non_uniform_expression(type, expr);
+		}
 
-		if (ptr_expression)
+		if (forward && ptr_expression)
 			ptr_expression->need_transpose = old_need_transpose;
 
 		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
@@ -7484,7 +8004,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// it is an array, and our backend does not support arrays as value types.
 			// Emit the temporary, and copy it explicitly.
 			e = &emit_uninitialized_temporary_expression(result_type, id);
-			emit_array_copy(to_expression(id), ptr);
+			emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr));
 		}
 		else
 			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
@@ -7492,12 +8012,22 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		e->need_transpose = need_transpose;
 		register_read(id, ptr, forward);
 
-		// Pass through whether the result is of a packed type.
-		if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked))
+		if (forward)
 		{
-			set_extended_decoration(id, SPIRVCrossDecorationPacked);
-			set_extended_decoration(id, SPIRVCrossDecorationPackedType,
-			                        get_extended_decoration(ptr, SPIRVCrossDecorationPackedType));
+			// Pass through whether the result is of a packed type and the physical type ID.
+			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
+				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
+			{
+				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
+				                        get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
+			}
+		}
+		else
+		{
+			// This might have been set on an earlier compilation iteration, force it to be unset.
+			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
 		}
 
 		inherit_expression_dependencies(id, ptr);
@@ -7523,24 +8053,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
 
 		auto *backing_variable = maybe_get_backing_variable(ops[2]);
-		expr.loaded_from = backing_variable ? backing_variable->self : ops[2];
+		expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
 		expr.need_transpose = meta.need_transpose;
 		expr.access_chain = true;
 
 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
 		if (meta.storage_is_packed)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
 		if (meta.storage_is_invariant)
 			set_decoration(ops[1], DecorationInvariant);
 
+		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
+		// temporary which could be subject to invalidation.
+		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
+		forwarded_temporaries.insert(ops[1]);
+		// The access chain itself is never forced to a temporary, but its dependencies might.
+		suppressed_usage_tracking.insert(ops[1]);
+
 		for (uint32_t i = 2; i < length; i++)
 		{
 			inherit_expression_dependencies(ops[1], ops[i]);
 			add_implied_read_expression(expr, ops[i]);
 		}
 
+		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
+		// we're not forwarded after all.
+		if (expr.expression_dependencies.empty())
+			forwarded_temporaries.erase(ops[1]);
+
 		break;
 	}
 
@@ -7548,6 +8090,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto *var = maybe_get<SPIRVariable>(ops[0]);
 
+		if (has_decoration(ops[0], DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ops[0]);
+
 		if (var && var->statically_assigned)
 			var->static_expression = ops[1];
 		else if (var && var->loop_variable && !var->loop_variable_enable)
@@ -7637,13 +8182,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			if (skip_argument(arg[i]))
 				continue;
 
-			arglist.push_back(to_func_call_arg(arg[i]));
+			arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
 		}
 
 		for (auto &combined : callee.combined_parameters)
 		{
-			uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
-			uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
+			auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
+			auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
 			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
 		}
 
@@ -7746,15 +8291,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			forward = false;
 
 		string constructor_op;
-		if (!backend.array_is_value_type && out_type.array.size() > 1)
-		{
-			// We cannot construct array of arrays because we cannot treat the inputs
-			// as value types. Need to declare the array-of-arrays, and copy in elements one by one.
-			emit_uninitialized_temporary_expression(result_type, id);
-			for (uint32_t i = 0; i < length; i++)
-				emit_array_copy(join(to_expression(id), "[", i, "]"), elems[i]);
-		}
-		else if (backend.use_initializer_list && composite)
+		if (backend.use_initializer_list && composite)
 		{
 			// Only use this path if we are building composites.
 			// This path cannot be used for arithmetic.
@@ -7764,14 +8301,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
 				constructor_op += "0";
 			else if (splat)
-				constructor_op += to_expression(elems[0]);
+				constructor_op += to_unpacked_expression(elems[0]);
 			else
 				constructor_op += build_composite_combiner(result_type, elems, length);
 			constructor_op += " }";
 		}
 		else if (swizzle_splat && !composite)
 		{
-			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0]));
+			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
 		}
 		else
 		{
@@ -7779,7 +8316,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			if (type_is_empty(out_type) && !backend.supports_empty_struct)
 				constructor_op += "0";
 			else if (splat)
-				constructor_op += to_expression(elems[0]);
+				constructor_op += to_unpacked_expression(elems[0]);
 			else
 				constructor_op += build_composite_combiner(result_type, elems, length);
 			constructor_op += ")";
@@ -7841,7 +8378,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			allow_base_expression = false;
 
 		// Packed expressions cannot be split up.
-		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked))
+		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked))
+			allow_base_expression = false;
+
+		// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
+		// into the base expression.
+		if (is_non_native_row_major_matrix(ops[2]))
 			allow_base_expression = false;
 
 		AccessChainMeta meta;
@@ -7864,14 +8406,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// from expression causing it to be forced to an actual temporary in GLSL.
 			auto expr = access_chain_internal(ops[2], &ops[3], length,
 			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
-			e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
+			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
 			inherit_expression_dependencies(id, ops[2]);
 			e->base_expression = ops[2];
 		}
 		else
 		{
 			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
-			e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
+			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
 			inherit_expression_dependencies(id, ops[2]);
 		}
 
@@ -7880,9 +8422,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// instead of loading everything through an access chain.
 		e->need_transpose = meta.need_transpose;
 		if (meta.storage_is_packed)
-			set_extended_decoration(id, SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
 		if (meta.storage_is_invariant)
 			set_decoration(id, DecorationInvariant);
 
@@ -7930,13 +8472,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t rhs = ops[2];
 		bool pointer = get<SPIRType>(result_type).pointer;
 
-		if (expression_is_lvalue(rhs) && !pointer)
+		auto *chain = maybe_get<SPIRAccessChain>(rhs);
+		if (chain)
+		{
+			// Cannot lower to a SPIRExpression, just copy the object.
+			auto &e = set<SPIRAccessChain>(id, *chain);
+			e.self = id;
+		}
+		else if (expression_is_lvalue(rhs) && !pointer)
 		{
 			// Need a copy.
 			// For pointer types, we copy the pointer itself.
-			statement(declare_temporary(result_type, id), to_expression(rhs), ";");
+			statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
 			set<SPIRExpression>(id, to_name(id), result_type, true);
-			inherit_expression_dependencies(id, rhs);
 		}
 		else
 		{
@@ -7947,7 +8495,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			if (pointer)
 			{
 				auto *var = maybe_get_backing_variable(rhs);
-				e.loaded_from = var ? var->self : 0;
+				e.loaded_from = var ? var->self : ID(0);
+			}
+
+			// If we're copying an access chain, need to inherit the read expressions.
+			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
+			if (rhs_expr)
+			{
+				e.implied_read_expressions = rhs_expr->implied_read_expressions;
+				e.expression_dependencies = rhs_expr->expression_dependencies;
 			}
 		}
 		break;
@@ -7972,7 +8528,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				shuffle = true;
 
 		// Cannot use swizzles with packed expressions, force shuffle path.
-		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked))
+		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
 			shuffle = true;
 
 		string expr;
@@ -7981,7 +8537,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (shuffle)
 		{
 			should_fwd = should_forward(vec0) && should_forward(vec1);
-			trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
+			trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
 
 			// Constructor style and shuffling from two different vectors.
 			SmallVector<string> args;
@@ -7994,7 +8550,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 					// a value we might not need, and bog down codegen.
 					SPIRConstant c;
 					c.constant_type = type0.parent_type;
-					assert(type0.parent_type != 0);
+					assert(type0.parent_type != ID(0));
 					args.push_back(constant_expression(c));
 				}
 				else if (elems[i] >= type0.vecsize)
@@ -8007,7 +8563,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		else
 		{
 			should_fwd = should_forward(vec0);
-			trivial_forward = !expression_is_forwarded(vec0);
+			trivial_forward = should_suppress_usage_tracking(vec0);
 
 			// We only source from first vector, so can use swizzle.
 			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
@@ -8027,8 +8583,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
 
 		emit_op(result_type, id, expr, should_fwd, trivial_forward);
+
 		inherit_expression_dependencies(id, vec0);
-		inherit_expression_dependencies(id, vec1);
+		if (vec0 != vec1)
+			inherit_expression_dependencies(id, vec1);
 		break;
 	}
 
@@ -8084,18 +8642,56 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (e && e->need_transpose)
 		{
 			e->need_transpose = false;
-			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
+			string expr;
+
+			if (opcode == OpMatrixTimesVector)
+				expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
+				            enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+			else
+				expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+				            to_enclosed_unpacked_expression(ops[2]));
+
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			emit_op(ops[0], ops[1], expr, forward);
 			e->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
 		}
 		else
 			GLSL_BOP(*);
 		break;
 	}
 
+	case OpMatrixTimesMatrix:
+	{
+		auto *a = maybe_get<SPIRExpression>(ops[2]);
+		auto *b = maybe_get<SPIRExpression>(ops[3]);
+
+		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
+		// a^T * b^T = (b * a)^T.
+		if (a && b && a->need_transpose && b->need_transpose)
+		{
+			a->need_transpose = false;
+			b->need_transpose = false;
+			auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+			                 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			auto &e = emit_op(ops[0], ops[1], expr, forward);
+			e.need_transpose = true;
+			a->need_transpose = true;
+			b->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
+		}
+		else
+			GLSL_BOP(*);
+
+		break;
+	}
+
 	case OpFMul:
 	case OpMatrixTimesScalar:
 	case OpVectorTimesScalar:
-	case OpMatrixTimesMatrix:
 		GLSL_BOP(*);
 		break;
 
@@ -8170,7 +8766,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_id = ops[1];
 		uint32_t op0 = ops[2];
 		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
 		auto &type = get<SPIRType>(result_type);
 		emit_uninitialized_temporary_expression(result_type, result_id);
 		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
@@ -8279,7 +8874,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
 		else
 			GLSL_BOP(||);
 		break;
@@ -8293,7 +8888,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &type = get<SPIRType>(result_type);
 
 		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
 		else
 			GLSL_BOP(&&);
 		break;
@@ -8350,7 +8945,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpUGreaterThan:
 	case OpSGreaterThan:
 	{
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(greaterThan, type);
 		else
@@ -8370,7 +8965,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
 	{
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(greaterThanEqual, type);
 		else
@@ -8390,7 +8985,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpULessThan:
 	case OpSLessThan:
 	{
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(lessThan, type);
 		else
@@ -8410,7 +9005,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
 	{
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
 			GLSL_BFOP_CAST(lessThanEqual, type);
 		else
@@ -8618,23 +9213,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	// Bitfield
 	case OpBitFieldInsert:
-		// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
-		GLSL_QFOP(bitfieldInsert);
+	{
+		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
 		break;
+	}
 
 	case OpBitFieldSExtract:
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
+		                                SPIRType::Int, SPIRType::Int);
+		break;
+	}
+
 	case OpBitFieldUExtract:
-		// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
-		GLSL_TFOP(bitfieldExtract);
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
+		                                SPIRType::Int, SPIRType::Int);
 		break;
+	}
 
 	case OpBitReverse:
+		// BitReverse does not have issues with sign since result type must match input type.
 		GLSL_UFOP(bitfieldReverse);
 		break;
 
 	case OpBitCount:
-		GLSL_UFOP(bitCount);
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
 		break;
+	}
 
 	// Atomics
 	case OpAtomicExchange:
@@ -8823,7 +9431,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 		// When using the image, we need to know which variable it is actually loaded from.
 		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
+		e.loaded_from = var ? var->self : ID(0);
 		break;
 	}
 
@@ -8883,6 +9491,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
@@ -9044,7 +9654,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 		// When using the pointer, we need to know which variable it is actually loaded from.
 		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
+		e.loaded_from = var ? var->self : ID(0);
 		break;
 	}
 
@@ -9304,6 +9914,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		{
 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
+		{
+			break; // Ignore SPIR-V debug information extended instructions.
+		}
 		else
 		{
 			statement("// unimplemented ext op ", instruction.op);
@@ -9495,28 +10109,98 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFUnordEqual:
-		GLSL_BFOP(unsupported_FUnordEqual);
-		break;
-
 	case OpFUnordNotEqual:
-		GLSL_BFOP(unsupported_FUnordNotEqual);
-		break;
-
 	case OpFUnordLessThan:
-		GLSL_BFOP(unsupported_FUnordLessThan);
-		break;
-
 	case OpFUnordGreaterThan:
-		GLSL_BFOP(unsupported_FUnordGreaterThan);
-		break;
-
 	case OpFUnordLessThanEqual:
-		GLSL_BFOP(unsupported_FUnordLessThanEqual);
-		break;
-
 	case OpFUnordGreaterThanEqual:
-		GLSL_BFOP(unsupported_FUnordGreaterThanEqual);
+	{
+		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
+		// but glslang always emits ordered floating point compares for GLSL.
+		// To get unordered compares, we can test the opposite thing and invert the result.
+		// This way, we force true when there is any NaN present.
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+
+		string expr;
+		if (expression_type(op0).vecsize > 1)
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = "notEqual";
+				break;
+
+			case OpFUnordNotEqual:
+				comp_op = "equal";
+				break;
+
+			case OpFUnordLessThan:
+				comp_op = "greaterThanEqual";
+				break;
+
+			case OpFUnordLessThanEqual:
+				comp_op = "greaterThan";
+				break;
+
+			case OpFUnordGreaterThan:
+				comp_op = "lessThanEqual";
+				break;
+
+			case OpFUnordGreaterThanEqual:
+				comp_op = "lessThan";
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
+		}
+		else
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = " != ";
+				break;
+
+			case OpFUnordNotEqual:
+				comp_op = " == ";
+				break;
+
+			case OpFUnordLessThan:
+				comp_op = " >= ";
+				break;
+
+			case OpFUnordLessThanEqual:
+				comp_op = " > ";
+				break;
+
+			case OpFUnordGreaterThan:
+				comp_op = " <= ";
+				break;
+
+			case OpFUnordGreaterThanEqual:
+				comp_op = " < ";
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
+		}
+
+		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
+		inherit_expression_dependencies(ops[1], op0);
+		inherit_expression_dependencies(ops[1], op1);
 		break;
+	}
 
 	case OpReportIntersectionNV:
 		statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
@@ -9564,6 +10248,57 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// Undefined value has been declared.
 		break;
 
+	case OpLine:
+	{
+		emit_line_directive(ops[0], ops[1]);
+		break;
+	}
+
+	case OpNoLine:
+		break;
+
+	case OpDemoteToHelperInvocationEXT:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_demote_to_helper_invocation");
+		statement(backend.demote_literal, ";");
+		break;
+
+	case OpIsHelperInvocationEXT:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_demote_to_helper_invocation");
+		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
+		break;
+
+	case OpBeginInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
+		{
+			if (options.es)
+				statement("beginInvocationInterlockNV();");
+			else
+				statement("beginInvocationInterlockARB();");
+
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
+		}
+		break;
+
+	case OpEndInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
+		{
+			if (options.es)
+				statement("endInvocationInterlockNV();");
+			else
+				statement("endInvocationInterlockARB();");
+
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
+		}
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@@ -9593,12 +10328,18 @@ void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t in
 		if (var_id)
 			flush_variable_declaration(var_id);
 
-		arglist.push_back(to_func_call_arg(arg.id));
+		arglist.push_back(to_func_call_arg(arg, arg.id));
 	}
 }
 
 string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
 {
+	if (type.type_alias != TypeID(0) &&
+	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
+	{
+		return to_member_name(get<SPIRType>(type.type_alias), index);
+	}
+
 	auto &memb = ir.meta[type.self].members;
 	if (index < memb.size() && !memb[index].alias.empty())
 		return memb[index].alias;
@@ -9674,21 +10415,50 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u
 	return true;
 }
 
+// Checks if we need to remap physical type IDs when declaring the type in a buffer.
+bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
+{
+	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+}
+
 // Checks whether the member is in packed data type, that might need to be unpacked.
-// GLSL does not define packed data types, but certain subclasses do.
-bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
+bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
 {
-	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked);
+	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
 }
 
 // Wraps the expression string in a function call that converts the
 // row_major matrix result of the expression to a column_major matrix.
 // Base implementation uses the standard library transpose() function.
 // Subclasses may override to use a different function.
-string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
+string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
+                                              bool /*is_packed*/)
 {
 	strip_enclosed_expression(exp_str);
-	return join("transpose(", exp_str, ")");
+	if (!is_matrix(exp_type))
+	{
+		auto column_index = exp_str.find_last_of('[');
+		if (column_index == string::npos)
+			return exp_str;
+
+		auto column_expr = exp_str.substr(column_index);
+		exp_str.resize(column_index);
+
+		auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
+
+		// Loading a column from a row-major matrix. Unroll the load.
+		for (uint32_t c = 0; c < exp_type.vecsize; c++)
+		{
+			transposed_expr += join(exp_str, '[', c, ']', column_expr);
+			if (c + 1 < exp_type.vecsize)
+				transposed_expr += ", ";
+		}
+
+		transposed_expr += ")";
+		return transposed_expr;
+	}
+	else
+		return join("transpose(", exp_str, ")");
 }
 
 string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
@@ -9721,6 +10491,10 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 	          variable_decl(membertype, to_member_name(type, index)), ";");
 }
 
+void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
+{
+}
+
 const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
 {
 	// GL_EXT_buffer_reference variables can be marked as restrict.
@@ -9779,7 +10553,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
 {
-	return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
+	auto &type = expression_type(id);
+	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
+	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	{
+		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+		auto &result_type = get<SPIRType>(type.image.type);
+		if (result_type.width < 32)
+			return "mediump ";
+	}
+	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }
 
 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@@ -9995,15 +10778,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	switch (imagetype.basetype)
 	{
 	case SPIRType::Int:
+	case SPIRType::Short:
+	case SPIRType::SByte:
 		res = "i";
 		break;
 	case SPIRType::UInt:
+	case SPIRType::UShort:
+	case SPIRType::UByte:
 		res = "u";
 		break;
 	default:
 		break;
 	}
 
+	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
+
 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
 		return res + "subpassInput" + (type.image.ms ? "MS" : "");
 
@@ -10312,7 +11102,7 @@ void CompilerGLSL::require_extension_internal(const string &ext)
 	}
 }
 
-void CompilerGLSL::flatten_buffer_block(uint32_t id)
+void CompilerGLSL::flatten_buffer_block(VariableID id)
 {
 	auto &var = get<SPIRVariable>(id);
 	auto &type = get<SPIRType>(var.basetype);
@@ -10428,7 +11218,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 
 	if (func.self == ir.default_entry_point)
 	{
-		decl += "main";
+		// If we need complex fallback in GLSL, we just wrap main() in a function
+		// and interlock the entire shader ...
+		if (interlocked_is_complex)
+			decl += "spvMainInterlockedBody";
+		else
+			decl += "main";
+
 		processing_entry_point = true;
 	}
 	else
@@ -10503,6 +11299,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 		}
 	}
 
+	if (func.entry_line.file_id != 0)
+		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
 	emit_function_prototype(func, return_flags);
 	begin_scope();
 
@@ -10523,6 +11321,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 	for (auto &v : func.local_variables)
 	{
 		auto &var = get<SPIRVariable>(v);
+		var.deferred_declaration = false;
+
 		if (var.storage == StorageClassWorkgroup)
 		{
 			// Special variable type which cannot have initializer,
@@ -10582,15 +11382,29 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 			var.deferred_declaration = false;
 	}
 
+	// Enforce declaration order for regression testing purposes.
+	for (auto &block_id : func.blocks)
+	{
+		auto &block = get<SPIRBlock>(block_id);
+		sort(begin(block.dominated_variables), end(block.dominated_variables));
+	}
+
 	for (auto &line : current_function->fixup_hooks_in)
 		line();
 
-	entry_block.loop_dominator = SPIRBlock::NoDominator;
 	emit_block_chain(entry_block);
 
 	end_scope();
 	processing_entry_point = false;
 	statement("");
+
+	// Make sure deferred declaration state for local variables is cleared when we are done with function.
+	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
+	for (auto &v : func.local_variables)
+	{
+		auto &var = get<SPIRVariable>(v);
+		var.deferred_declaration = false;
+	}
 }
 
 void CompilerGLSL::emit_fixup()
@@ -10609,18 +11423,11 @@ void CompilerGLSL::emit_fixup()
 	}
 }
 
-bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
-{
-	auto &child = get<SPIRBlock>(to);
-	for (auto &phi : child.phi_variables)
-		if (phi.parent == from)
-			return true;
-	return false;
-}
-
-void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
+void CompilerGLSL::flush_phi(BlockID from, BlockID to)
 {
 	auto &child = get<SPIRBlock>(to);
+	if (child.ignore_phi_from_block == from)
+		return;
 
 	unordered_set<uint32_t> temporary_phi_variables;
 
@@ -10645,7 +11452,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
 				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
 				bool need_saved_temporary =
 				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
-					    return future_phi.local_variable == phi.function_variable && future_phi.parent == from;
+					    return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
 				    }) != end(child.phi_variables);
 
 				if (need_saved_temporary)
@@ -10680,7 +11487,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
 	}
 }
 
-void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
+void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
 {
 	auto &to_block = get<SPIRBlock>(to);
 	if (from == to)
@@ -10691,16 +11498,11 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
 	{
 		// Just emit the whole block chain as is.
 		auto usage_counts = expression_usage_counts;
-		auto invalid = invalid_expressions;
 
 		emit_block_chain(to_block);
 
-		// Expression usage counts and invalid expressions
-		// are moot after returning from the continue block.
-		// Since we emit the same block multiple times,
-		// we don't want to invalidate ourselves.
+		// Expression usage counts are moot after returning from the continue block.
 		expression_usage_counts = usage_counts;
-		invalid_expressions = invalid;
 	}
 	else
 	{
@@ -10715,23 +11517,23 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
 			// so just use "self" here.
 			loop_dominator = from;
 		}
-		else if (from_block.loop_dominator != SPIRBlock::NoDominator)
+		else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
 		{
 			loop_dominator = from_block.loop_dominator;
 		}
 
 		if (loop_dominator != 0)
 		{
-			auto &dominator = get<SPIRBlock>(loop_dominator);
+			auto &cfg = get_cfg_for_current_function();
 
 			// For non-complex continue blocks, we implicitly branch to the continue block
 			// by having the continue block be part of the loop header in for (; ; continue-block).
-			outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
+			outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
 		}
 
 		// Some simplification for for-loops. We always end up with a useless continue;
 		// statement since we branch to a loop block.
-		// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
+		// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
 		// we can avoid writing out an explicit continue statement.
 		// Similar optimization to return statements if we know we're outside flow control.
 		if (!outside_control_flow)
@@ -10739,11 +11541,12 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
 	}
 }
 
-void CompilerGLSL::branch(uint32_t from, uint32_t to)
+void CompilerGLSL::branch(BlockID from, BlockID to)
 {
 	flush_phi(from, to);
 	flush_control_dependent_expressions(from);
-	flush_all_active_variables();
+
+	bool to_is_continue = is_continue(to);
 
 	// This is only a continue if we branch to our loop dominator.
 	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
@@ -10760,7 +11563,8 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
 		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
 		// write to the ladder here, and defer the break.
 		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
-		if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u &&
+		if (current_emitting_switch && is_loop_break(to) &&
+		    current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
 		    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
 		{
 			if (!current_emitting_switch->need_ladder_break)
@@ -10773,12 +11577,25 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
 		}
 		statement("break;");
 	}
-	else if (is_continue(to) || (from == to))
+	else if (to_is_continue || from == to)
 	{
 		// For from == to case can happen for a do-while loop which branches into itself.
 		// We don't mark these cases as continue blocks, but the only possible way to branch into
 		// ourselves is through means of continue blocks.
-		branch_to_continue(from, to);
+
+		// If we are merging to a continue block, there is no need to emit the block chain for continue here.
+		// We can branch to the continue block after we merge execution.
+
+		// Here we make use of structured control flow rules from spec:
+		// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
+		//       - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
+		// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
+		auto &block_meta = ir.block_meta[to];
+		bool branching_to_merge =
+		    (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
+		                   ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
+		if (!to_is_continue || !branching_to_merge)
+			branch_to_continue(from, to);
 	}
 	else if (!is_conditional(to))
 		emit_block_chain(get<SPIRBlock>(to));
@@ -10789,12 +11606,19 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
 	// Inner scope always takes precedence.
 }
 
-void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
+void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
 {
-	// If we branch directly to a selection merge target, we don't really need a code path.
+	auto &from_block = get<SPIRBlock>(from);
+	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
+
+	// If we branch directly to a selection merge target, we don't need a code path.
+	// This covers both merge out of if () / else () as well as a break for switch blocks.
 	bool true_sub = !is_conditional(true_block);
 	bool false_sub = !is_conditional(false_block);
 
+	bool true_block_is_selection_merge = true_block == merge_block;
+	bool false_block_is_selection_merge = false_block == merge_block;
+
 	if (true_sub)
 	{
 		emit_block_hints(get<SPIRBlock>(from));
@@ -10803,7 +11627,11 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 		branch(from, true_block);
 		end_scope();
 
-		if (false_sub || is_continue(false_block) || is_break(false_block))
+		// If we merge to continue, we handle that explicitly in emit_block_chain(),
+		// so there is no need to branch to it directly here.
+		// break; is required to handle ladder fallthrough cases, so keep that in for now, even
+		// if we could potentially handle it in emit_block_chain().
+		if (false_sub || (!false_block_is_selection_merge && is_continue(false_block)) || is_break(false_block))
 		{
 			statement("else");
 			begin_scope();
@@ -10818,7 +11646,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 			end_scope();
 		}
 	}
-	else if (false_sub && !true_sub)
+	else if (false_sub)
 	{
 		// Only need false path, use negative conditional.
 		emit_block_hints(get<SPIRBlock>(from));
@@ -10827,7 +11655,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 		branch(from, false_block);
 		end_scope();
 
-		if (is_continue(true_block) || is_break(true_block))
+		if ((!true_block_is_selection_merge && is_continue(true_block)) || is_break(true_block))
 		{
 			statement("else");
 			begin_scope();
@@ -10844,44 +11672,6 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 	}
 }
 
-void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
-{
-	// Propagate down the loop dominator block, so that dominated blocks can back trace.
-	if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
-	{
-		uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator;
-
-		auto set_dominator = [this](uint32_t self, uint32_t new_dominator) {
-			auto &dominated_block = this->get<SPIRBlock>(self);
-
-			// If we already have a loop dominator, we're trying to break out to merge targets
-			// which should not update the loop dominator.
-			if (!dominated_block.loop_dominator)
-				dominated_block.loop_dominator = new_dominator;
-		};
-
-		// After merging a loop, we inherit the loop dominator always.
-		if (block.merge_block)
-			set_dominator(block.merge_block, block.loop_dominator);
-
-		if (block.true_block)
-			set_dominator(block.true_block, dominator);
-		if (block.false_block)
-			set_dominator(block.false_block, dominator);
-		if (block.next_block)
-			set_dominator(block.next_block, dominator);
-		if (block.default_block)
-			set_dominator(block.default_block, dominator);
-
-		for (auto &c : block.cases)
-			set_dominator(c.block, dominator);
-
-		// In older glslang output continue_block can be == loop header.
-		if (block.continue_block && block.continue_block != block.self)
-			set_dominator(block.continue_block, dominator);
-	}
-}
-
 // FIXME: This currently cannot handle complex continue blocks
 // as in do-while.
 // This should be seen as a "trivial" continue block.
@@ -10902,7 +11692,6 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_tr
 	// Stamp out all blocks one after each other.
 	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
 	{
-		propagate_loop_dominators(*block);
 		// Write out all instructions we have in this block.
 		emit_block_instructions(*block);
 
@@ -11114,7 +11903,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}
 
 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}
 
 			begin_scope();
@@ -11146,7 +11938,6 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 
 		if (current_count == statement_count && condition_is_temporary)
 		{
-			propagate_loop_dominators(child);
 			uint32_t target_block = child.true_block;
 
 			switch (continue_type)
@@ -11189,7 +11980,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}
 
 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}
 
 			begin_scope();
@@ -11210,18 +12004,16 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 
 void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
 {
-	// Enforce declaration order for regression testing purposes.
-	sort(begin(block.dominated_variables), end(block.dominated_variables));
 	for (auto &v : block.dominated_variables)
 		flush_variable_declaration(v);
 }
 
-void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>> &temporaries)
+void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
 {
 	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
 	// Need to sort these to ensure that reference output is stable.
 	sort(begin(temporaries), end(temporaries),
-	     [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
+	     [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
 
 	for (auto &tmp : temporaries)
 	{
@@ -11240,8 +12032,6 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>
 
 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 {
-	propagate_loop_dominators(block);
-
 	bool select_branch_to_true_block = false;
 	bool select_branch_to_false_block = false;
 	bool skip_direct_branch = false;
@@ -11255,8 +12045,22 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
 
 	// If we have loop variables, stop masking out access to the variable now.
-	for (auto var : block.loop_variables)
-		get<SPIRVariable>(var).loop_variable_enable = true;
+	for (auto var_id : block.loop_variables)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		var.loop_variable_enable = true;
+		// We're not going to declare the variable directly, so emit a copy here.
+		emit_variable_temporary_copies(var);
+	}
+
+	// Remember deferred declaration state. We will restore it before returning.
+	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
+	for (size_t i = 0; i < block.dominated_variables.size(); i++)
+	{
+		uint32_t var_id = block.dominated_variables[i];
+		auto &var = get<SPIRVariable>(var_id);
+		rearm_dominated_variables[i] = var.deferred_declaration;
+	}
 
 	// This is the method often used by spirv-opt to implement loops.
 	// The loop header goes straight into the continue block.
@@ -11416,7 +12220,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	case SPIRBlock::MultiSelect:
 	{
 		auto &type = expression_type(block.condition);
-		bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort;
+		bool unsigned_case =
+		    type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
 
 		if (block.merge == SPIRBlock::MergeNone)
 			SPIRV_CROSS_THROW("Switch statement is not structured");
@@ -11441,61 +12246,182 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		if (block.need_ladder_break)
 			statement("bool _", block.self, "_ladder_break = false;");
 
+		// Find all unique case constructs.
+		unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
+		SmallVector<uint32_t> block_declaration_order;
+		SmallVector<uint32_t> literals_to_merge;
+
+		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
+		// and let the default: block handle it.
+		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
+		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
+		for (auto &c : block.cases)
+		{
+			if (c.block != block.next_block && c.block != block.default_block)
+			{
+				if (!case_constructs.count(c.block))
+					block_declaration_order.push_back(c.block);
+				case_constructs[c.block].push_back(c.value);
+			}
+			else if (c.block == block.next_block && block.default_block != block.next_block)
+			{
+				// We might have to flush phi inside specific case labels.
+				// If we can piggyback on default:, do so instead.
+				literals_to_merge.push_back(c.value);
+			}
+		}
+
+		// Empty literal array -> default.
+		if (block.default_block != block.next_block)
+		{
+			auto &default_block = get<SPIRBlock>(block.default_block);
+
+			// We need to slide in the default block somewhere in this chain
+			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
+			// Only consider trivial fall-through cases here.
+			size_t num_blocks = block_declaration_order.size();
+			bool injected_block = false;
+
+			for (size_t i = 0; i < num_blocks; i++)
+			{
+				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
+				if (execution_is_direct_branch(case_block, default_block))
+				{
+					// Fallthrough to default block, we must inject the default block here.
+					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
+					injected_block = true;
+					break;
+				}
+				else if (execution_is_direct_branch(default_block, case_block))
+				{
+					// Default case is falling through to another case label, we must inject the default block here.
+					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
+					injected_block = true;
+					break;
+				}
+			}
+
+			// Order does not matter.
+			if (!injected_block)
+				block_declaration_order.push_back(block.default_block);
+
+			case_constructs[block.default_block] = {};
+		}
+
+		size_t num_blocks = block_declaration_order.size();
+
+		const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
+			return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
+		};
+
+		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
+		// we need to flush phi nodes outside the switch block in a branch,
+		// and skip any Phi handling inside the case label to make fall-through work as expected.
+		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
+		// inside the case label if at all possible.
+		for (size_t i = 1; i < num_blocks; i++)
+		{
+			if (flush_phi_required(block.self, block_declaration_order[i]) &&
+			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
+			{
+				uint32_t target_block = block_declaration_order[i];
+
+				// Make sure we flush Phi, it might have been marked to be ignored earlier.
+				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
+
+				auto &literals = case_constructs[target_block];
+
+				if (literals.empty())
+				{
+					// Oh boy, gotta make a complete negative test instead! o.o
+					// Find all possible literals that would *not* make us enter the default block.
+					// If none of those literals match, we flush Phi ...
+					SmallVector<string> conditions;
+					for (size_t j = 0; j < num_blocks; j++)
+					{
+						auto &negative_literals = case_constructs[block_declaration_order[j]];
+						for (auto &case_label : negative_literals)
+							conditions.push_back(join(to_enclosed_expression(block.condition),
+							                          " != ", to_case_label(case_label, unsigned_case)));
+					}
+
+					statement("if (", merge(conditions, " && "), ")");
+					begin_scope();
+					flush_phi(block.self, target_block);
+					end_scope();
+				}
+				else
+				{
+					SmallVector<string> conditions;
+					conditions.reserve(literals.size());
+					for (auto &case_label : literals)
+						conditions.push_back(join(to_enclosed_expression(block.condition),
+						                          " == ", to_case_label(case_label, unsigned_case)));
+					statement("if (", merge(conditions, " || "), ")");
+					begin_scope();
+					flush_phi(block.self, target_block);
+					end_scope();
+				}
+
+				// Mark the block so that we don't flush Phi from header to case label.
+				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
+			}
+		}
+
 		emit_block_hints(block);
 		statement("switch (", to_expression(block.condition), ")");
 		begin_scope();
 
-		// Multiple case labels can branch to same block, so find all unique blocks.
-		bool emitted_default = false;
-		unordered_set<uint32_t> emitted_blocks;
-
-		for (auto &c : block.cases)
+		for (size_t i = 0; i < num_blocks; i++)
 		{
-			if (emitted_blocks.count(c.block) != 0)
-				continue;
+			uint32_t target_block = block_declaration_order[i];
+			auto &literals = case_constructs[target_block];
 
-			// Emit all case labels which branch to our target.
-			// FIXME: O(n^2), revisit if we hit shaders with 100++ case labels ...
-			for (auto &other_case : block.cases)
+			if (literals.empty())
 			{
-				if (other_case.block == c.block)
+				// Default case.
+				statement("default:");
+			}
+			else
+			{
+				for (auto &case_literal : literals)
 				{
 					// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
-					auto case_value = unsigned_case ? convert_to_string(uint32_t(other_case.value)) :
-					                                  convert_to_string(int32_t(other_case.value));
-					statement("case ", case_value, label_suffix, ":");
+					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
 				}
 			}
 
-			// Maybe we share with default block?
-			if (block.default_block == c.block)
+			auto &case_block = get<SPIRBlock>(target_block);
+			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
+			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
 			{
-				statement("default:");
-				emitted_default = true;
+				// We will fall through here, so just terminate the block chain early.
+				// We still need to deal with Phi potentially.
+				// No need for a stack-like thing here since we only do fall-through when there is a
+				// single trivial branch to fall-through target..
+				current_emitting_switch_fallthrough = true;
 			}
-
-			// Complete the target.
-			emitted_blocks.insert(c.block);
+			else
+				current_emitting_switch_fallthrough = false;
 
 			begin_scope();
-			branch(block.self, c.block);
+			branch(block.self, target_block);
 			end_scope();
+
+			current_emitting_switch_fallthrough = false;
 		}
 
-		if (!emitted_default)
+		// Might still have to flush phi variables if we branch from loop header directly to merge target.
+		if (flush_phi_required(block.self, block.next_block))
 		{
-			if (block.default_block != block.next_block)
-			{
-				statement("default:");
-				begin_scope();
-				if (is_break(block.default_block))
-					SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ...");
-				branch(block.self, block.default_block);
-				end_scope();
-			}
-			else if (flush_phi_required(block.self, block.next_block))
+			if (block.default_block == block.next_block || !literals_to_merge.empty())
 			{
-				statement("default:");
+				for (auto &case_literal : literals_to_merge)
+					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
+
+				if (block.default_block == block.next_block)
+					statement("default:");
+
 				begin_scope();
 				flush_phi(block.self, block.next_block);
 				statement("break;");
@@ -11518,12 +12444,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	}
 
 	case SPIRBlock::Return:
+	{
 		for (auto &line : current_function->fixup_hooks_out)
 			line();
 
 		if (processing_entry_point)
 			emit_fixup();
 
+		auto &cfg = get_cfg_for_current_function();
+
 		if (block.return_value)
 		{
 			auto &type = expression_type(block.return_value);
@@ -11532,10 +12461,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 				// If we cannot return arrays, we will have a special out argument we can write to instead.
 				// The backend is responsible for setting this up, and redirection the return values as appropriate.
 				if (ir.ids[block.return_value].get_type() != TypeUndef)
-					emit_array_copy("SPIRV_Cross_return_value", block.return_value);
+				{
+					emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction,
+					                get_backing_variable_storage(block.return_value));
+				}
 
-				if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
-				    block.loop_dominator != SPIRBlock::NoDominator)
+				if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+				    block.loop_dominator != BlockID(SPIRBlock::NoDominator))
 				{
 					statement("return;");
 				}
@@ -11547,16 +12479,17 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 					statement("return ", to_expression(block.return_value), ";");
 			}
 		}
-		// If this block is the very final block and not called from control flow,
-		// we do not need an explicit return which looks out of place. Just end the function here.
-		// In the very weird case of for(;;) { return; } executing return is unconditional,
-		// but we actually need a return here ...
-		else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
-		         block.loop_dominator != SPIRBlock::NoDominator)
+		else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+		         block.loop_dominator != BlockID(SPIRBlock::NoDominator))
 		{
+			// If this block is the very final block and not called from control flow,
+			// we do not need an explicit return which looks out of place. Just end the function here.
+			// In the very weird case of for(;;) { return; } executing return is unconditional,
+			// but we actually need a return here ...
 			statement("return;");
 		}
 		break;
+	}
 
 	case SPIRBlock::Kill:
 		statement(backend.discard_literal, ";");
@@ -11577,22 +12510,26 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		if (block.merge != SPIRBlock::MergeSelection)
 			flush_phi(block.self, block.next_block);
 
-		// For merge selects we might have ignored the fact that a merge target
-		// could have been a break; or continue;
-		// We will need to deal with it here.
-		if (is_loop_break(block.next_block))
-		{
-			// Cannot check for just break, because switch statements will also use break.
-			assert(block.merge == SPIRBlock::MergeSelection);
-			statement("break;");
-		}
-		else if (is_continue(block.next_block))
+		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
+		if (!current_emitting_switch_fallthrough)
 		{
-			assert(block.merge == SPIRBlock::MergeSelection);
-			branch_to_continue(block.self, block.next_block);
+			// For merge selects we might have ignored the fact that a merge target
+			// could have been a break; or continue;
+			// We will need to deal with it here.
+			if (is_loop_break(block.next_block))
+			{
+				// Cannot check for just break, because switch statements will also use break.
+				assert(block.merge == SPIRBlock::MergeSelection);
+				statement("break;");
+			}
+			else if (is_continue(block.next_block))
+			{
+				assert(block.merge == SPIRBlock::MergeSelection);
+				branch_to_continue(block.self, block.next_block);
+			}
+			else if (BlockID(block.self) != block.next_block)
+				emit_block_chain(get<SPIRBlock>(block.next_block));
 		}
-		else if (block.self != block.next_block)
-			emit_block_chain(get<SPIRBlock>(block.next_block));
 	}
 
 	if (block.merge == SPIRBlock::MergeLoop)
@@ -11636,6 +12573,20 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
 	// Forget about control dependent expressions now.
 	block.invalidate_expressions.clear();
+
+	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
+	// re-declare variables if necessary.
+	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
+	for (size_t i = 0; i < block.dominated_variables.size(); i++)
+	{
+		uint32_t var = block.dominated_variables[i];
+		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
+	}
+
+	// Just like for deferred declaration, we need to forget about loop variable enable
+	// if our block chain is reinstantiated later.
+	for (auto &var_id : block.loop_variables)
+		get<SPIRVariable>(var_id).loop_variable_enable = false;
 }
 
 void CompilerGLSL::begin_scope()
@@ -11652,6 +12603,14 @@ void CompilerGLSL::end_scope()
 	statement("}");
 }
 
+void CompilerGLSL::end_scope(const string &trailer)
+{
+	if (!indent)
+		SPIRV_CROSS_THROW("Popping empty indent stack.");
+	indent--;
+	statement("}", trailer);
+}
+
 void CompilerGLSL::end_scope_decl()
 {
 	if (!indent)
@@ -11708,7 +12667,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
 }
 
-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
 {
 	statement(lhs, " = ", to_expression(rhs_id), ";");
 }
@@ -11793,6 +12752,7 @@ void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &ex
 	case BuiltInBaseVertex:
 	case BuiltInBaseInstance:
 	case BuiltInDrawIndex:
+	case BuiltInFragStencilRefEXT:
 		expected_type = SPIRType::Int;
 		break;
 
@@ -11828,6 +12788,7 @@ void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &exp
 	case BuiltInLayer:
 	case BuiltInPrimitiveId:
 	case BuiltInViewportIndex:
+	case BuiltInFragStencilRefEXT:
 		expected_type = SPIRType::Int;
 		break;
 
@@ -11898,3 +12859,124 @@ void CompilerGLSL::reset_name_caches()
 	block_names.clear();
 	function_overloads.clear();
 }
+
+void CompilerGLSL::fixup_type_alias()
+{
+	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
+	// FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
+	// alias if the slave type is a block type.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+		if (type.type_alias && type_is_block_like(type))
+		{
+			// Become the master.
+			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
+				if (other_id == type.self)
+					return;
+
+				if (other_type.type_alias == type.type_alias)
+					other_type.type_alias = type.self;
+			});
+
+			this->get<SPIRType>(type.type_alias).type_alias = self;
+			type.type_alias = 0;
+		}
+	});
+
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+		if (type.type_alias && type_is_block_like(type))
+		{
+			// This is not allowed, drop the type_alias.
+			type.type_alias = 0;
+		}
+		else if (type.type_alias && !type_is_block_like(this->get<SPIRType>(type.type_alias)))
+		{
+			// If the alias master is not a block-like type, there is no reason to use type aliasing.
+			// This case can happen if two structs are declared with the same name, but they are unrelated.
+			// Aliases are only used to deal with aliased types for structs which are used in different buffer types
+			// which all create a variant of the same struct with different DecorationOffset values.
+			type.type_alias = 0;
+		}
+	});
+}
+
+void CompilerGLSL::reorder_type_alias()
+{
+	// Reorder declaration of types so that the master of the type alias is always emitted first.
+	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
+	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
+	auto loop_lock = ir.create_loop_hard_lock();
+
+	auto &type_ids = ir.ids_for_type[TypeType];
+	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
+	{
+		auto &type = get<SPIRType>(*alias_itr);
+		if (type.type_alias != TypeID(0) &&
+		    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
+		{
+			// We will skip declaring this type, so make sure the type_alias type comes before.
+			auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
+			assert(master_itr != end(type_ids));
+
+			if (alias_itr < master_itr)
+			{
+				// Must also swap the type order for the constant-type joined array.
+				auto &joined_types = ir.ids_for_constant_or_type;
+				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
+				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
+				assert(alt_alias_itr != end(joined_types));
+				assert(alt_master_itr != end(joined_types));
+
+				swap(*alias_itr, *master_itr);
+				swap(*alt_alias_itr, *alt_master_itr);
+			}
+		}
+	}
+}
+
+void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
+{
+	// If we are redirecting statements, ignore the line directive.
+	// Common case here is continue blocks.
+	if (redirect_statement)
+		return;
+
+	if (options.emit_line_directives)
+	{
+		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
+		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
+	}
+}
+
+void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
+{
+	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
+	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
+	// Back-propagate the qualifier based on the expression dependency chain.
+
+	if (!has_decoration(id, DecorationNonUniformEXT))
+	{
+		set_decoration(id, DecorationNonUniformEXT);
+		force_recompile();
+	}
+
+	auto *e = maybe_get<SPIRExpression>(id);
+	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
+	auto *chain = maybe_get<SPIRAccessChain>(id);
+	if (e)
+	{
+		for (auto &expr : e->expression_dependencies)
+			propagate_nonuniform_qualifier(expr);
+		for (auto &expr : e->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+	else if (combined)
+	{
+		propagate_nonuniform_qualifier(combined->image);
+		propagate_nonuniform_qualifier(combined->sampler);
+	}
+	else if (chain)
+	{
+		for (auto &expr : chain->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
index 184bbbd..6f59bd8 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
@@ -103,6 +103,10 @@ public:
 		// Does not apply to shader storage or push constant blocks.
 		bool emit_uniform_buffer_as_plain_uniforms = false;
 
+		// Emit OpLine directives if present in the module.
+		// May not correspond exactly to original source, but should be a good approximation.
+		bool emit_line_directives = false;
+
 		enum Precision
 		{
 			DontCare,
@@ -205,7 +209,7 @@ public:
 	// For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but
 	// mixing int and float is not.
 	// The name of the uniform array will be the same as the interface block name.
-	void flatten_buffer_block(uint32_t id);
+	void flatten_buffer_block(VariableID id);
 
 protected:
 	void reset();
@@ -219,6 +223,7 @@ protected:
 
 	SPIRBlock *current_emitting_block = nullptr;
 	SPIRBlock *current_emitting_switch = nullptr;
+	bool current_emitting_switch_fallthrough = false;
 
 	virtual void emit_instruction(const Instruction &instr);
 	void emit_block_instructions(SPIRBlock &block);
@@ -233,35 +238,40 @@ protected:
 	virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                                        uint32_t count);
 	virtual void emit_header();
+	void emit_line_directive(uint32_t file_id, uint32_t line_literal);
 	void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
 	                          const SpecializationConstant &y, const SpecializationConstant &z);
 
 	virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
 	virtual void emit_texture_op(const Instruction &i);
+	virtual std::string to_texture_op(const Instruction &i, bool *forward,
+	                                  SmallVector<uint32_t> &inherited_expressions);
 	virtual void emit_subgroup_op(const Instruction &i);
 	virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0);
 	virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage);
 	virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                                const std::string &qualifier = "", uint32_t base_offset = 0);
+	virtual void emit_struct_padding_target(const SPIRType &type);
 	virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0);
 	std::string constant_expression(const SPIRConstant &c);
 	std::string constant_op_expression(const SPIRConstantOp &cop);
 	virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector);
 	virtual void emit_fixup();
 	virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
-	virtual std::string to_func_call_arg(uint32_t id);
-	virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+	virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
+	virtual std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
 	                                     bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad,
-	                                     bool has_dref, uint32_t lod);
-	virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+	                                     bool has_dref, uint32_t lod, uint32_t minlod);
+	virtual std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
 	                                     bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
 	                                     uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset,
 	                                     uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample,
-	                                     bool *p_forward);
+	                                     uint32_t minlod, bool *p_forward);
 	virtual void emit_buffer_block(const SPIRVariable &type);
 	virtual void emit_push_constant_block(const SPIRVariable &var);
 	virtual void emit_uniform(const SPIRVariable &var);
-	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id);
+	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+	                                           bool packed_type, bool row_major);
 
 	StringStream<> buffer;
 
@@ -322,6 +332,7 @@ protected:
 
 	void begin_scope();
 	void end_scope();
+	void end_scope(const std::string &trailer);
 	void end_scope_decl();
 	void end_scope_decl(const std::string &decl);
 
@@ -341,8 +352,10 @@ protected:
 
 	virtual bool is_non_native_row_major_matrix(uint32_t id);
 	virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
-	bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
-	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
+	bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const;
+	bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const;
+	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type,
+	                                             uint32_t physical_type_id, bool is_packed);
 
 	std::unordered_set<std::string> local_variable_names;
 	std::unordered_set<std::string> resource_names;
@@ -363,6 +376,7 @@ protected:
 	struct BackendVariations
 	{
 		std::string discard_literal = "discard";
+		std::string demote_literal = "demote";
 		std::string null_pointer_literal = "";
 		bool float_literal_suffix = false;
 		bool double_literal_suffix = true;
@@ -377,6 +391,7 @@ protected:
 		const char *int16_t_literal_suffix = "s";
 		const char *uint16_t_literal_suffix = "us";
 		const char *nonuniform_qualifier = "nonuniformEXT";
+		const char *boolean_mix_function = "mix";
 		bool swizzle_is_function = false;
 		bool shared_is_implied = false;
 		bool unsized_array_supported = true;
@@ -387,7 +402,6 @@ protected:
 		bool can_declare_arrays_inline = true;
 		bool native_row_major_matrix = true;
 		bool use_constructor_splatting = true;
-		bool boolean_mix_support = true;
 		bool allow_precision_qualifiers = false;
 		bool can_swizzle_scalar = false;
 		bool force_gl_in_out_block = false;
@@ -398,6 +412,8 @@ protected:
 		bool array_is_value_type = true;
 		bool comparison_image_samples_scalar = false;
 		bool native_pointers = false;
+		bool support_small_type_sampling_result = false;
+		bool support_case_fallthrough = true;
 	} backend;
 
 	void emit_struct(SPIRType &type);
@@ -412,24 +428,24 @@ protected:
 	void emit_interface_block(const SPIRVariable &type);
 	void emit_flattened_io_block(const SPIRVariable &var, const char *qual);
 	void emit_block_chain(SPIRBlock &block);
-	void emit_hoisted_temporaries(SmallVector<std::pair<uint32_t, uint32_t>> &temporaries);
+	void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
 	std::string constant_value_macro_name(uint32_t id);
 	void emit_constant(const SPIRConstant &constant);
 	void emit_specialization_constant_op(const SPIRConstantOp &constant);
 	std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block);
 	bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method);
-	void propagate_loop_dominators(const SPIRBlock &block);
 
-	void branch(uint32_t from, uint32_t to);
-	void branch_to_continue(uint32_t from, uint32_t to);
-	void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block);
-	void flush_phi(uint32_t from, uint32_t to);
-	bool flush_phi_required(uint32_t from, uint32_t to);
+	void branch(BlockID from, BlockID to);
+	void branch_to_continue(BlockID from, BlockID to);
+	void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block);
+	void flush_phi(BlockID from, BlockID to);
 	void flush_variable_declaration(uint32_t id);
 	void flush_undeclared_variables(SPIRBlock &block);
+	void emit_variable_temporary_copies(const SPIRVariable &var);
 
 	bool should_dereference(uint32_t id);
-	bool should_forward(uint32_t id);
+	bool should_forward(uint32_t id) const;
+	bool should_suppress_usage_tracking(uint32_t id) const;
 	void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
 	void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
 	bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
@@ -445,11 +461,18 @@ protected:
 	                              SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 	void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                               const char *op, SPIRType::BaseType input_type);
+	void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+	                                     uint32_t op2, const char *op, SPIRType::BaseType expected_result_type,
+	                                     SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+	                                     SPIRType::BaseType input_type2);
+	void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
+	                             uint32_t op3, const char *op, SPIRType::BaseType offset_count_type);
 
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
-	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                             bool negate, SPIRType::BaseType expected_type);
 	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
 	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
 
@@ -460,7 +483,8 @@ protected:
 	                                  uint32_t false_value);
 
 	void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
-	bool expression_is_forwarded(uint32_t id);
+	bool expression_is_forwarded(uint32_t id) const;
+	bool expression_suppresses_usage_tracking(uint32_t id) const;
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
 	                        bool suppress_usage_tracking = false);
 
@@ -494,8 +518,11 @@ protected:
 	SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
 	void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
 	std::string to_expression(uint32_t id, bool register_expression_read = true);
+	std::string to_composite_constructor_expression(uint32_t id);
+	std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type);
 	std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
+	std::string to_unpacked_row_major_matrix_expression(uint32_t id);
 	std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
@@ -517,15 +544,16 @@ protected:
 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
 	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
 	std::string layout_for_variable(const SPIRVariable &variable);
-	std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id);
+	std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
 	virtual bool skip_argument(uint32_t id) const;
-	virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id);
+	virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
+	                             spv::StorageClass rhs_storage);
 	virtual void emit_block_hints(const SPIRBlock &block);
 	virtual std::string to_initializer_expression(const SPIRVariable &var);
 
 	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0,
 	                                uint32_t end_offset = ~(0u));
-	std::string buffer_to_packing_standard(const SPIRType &type, bool enable_std430);
+	std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout);
 
 	uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing);
 	uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
@@ -661,6 +689,11 @@ protected:
 
 	char current_locale_radix_character = '.';
 
+	void fixup_type_alias();
+	void reorder_type_alias();
+
+	void propagate_nonuniform_qualifier(uint32_t id);
+
 private:
 	void init();
 };
diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
index 46613c5..4d4e276 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
@@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
 	}
 }
 
-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	const char *dim = nullptr;
@@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 		if (type.image.sampled == 1)
 			return join("Buffer<", type_to_glsl(imagetype), components, ">");
 		else if (type.image.sampled == 2)
+		{
+			if (interlocked_resources.count(id))
+				return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+				            ">");
 			return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+		}
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
@@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 	const char *arrayed = type.image.arrayed ? "Array" : "";
 	const char *ms = type.image.ms ? "MS" : "";
 	const char *rw = typed_load ? "RW" : "";
+	if (typed_load && interlocked_resources.count(id))
+		rw = "RasterizerOrdered";
 	return join(rw, "Texture", dim, ms, arrayed, "<",
 	            typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
 	                         join(type_to_glsl(imagetype), components),
@@ -1038,8 +1045,9 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 {
 	bool emitted = false;
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
+	auto loop_lock = ir.create_loop_hard_lock();
 	for (auto &id_ : ir.ids_for_constant_or_type)
 	{
 		auto &id = ir.ids[id_];
@@ -1742,6 +1750,46 @@ void CompilerHLSL::emit_resources()
 		end_scope();
 		statement("");
 	}
+
+	if (requires_scalar_reflect)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Reflect(float i, float n)");
+		begin_scope();
+		statement("return i - 2.0 * dot(n, i) * n;");
+		end_scope();
+		statement("");
+	}
+
+	if (requires_scalar_refract)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
+		begin_scope();
+		statement("float NoI = n * i;");
+		statement("float NoI2 = NoI * NoI;");
+		statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
+		statement("if (k < 0.0)");
+		begin_scope();
+		statement("return 0.0;");
+		end_scope();
+		statement("else");
+		begin_scope();
+		statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+		end_scope();
+		end_scope();
+		statement("");
+	}
+
+	if (requires_scalar_faceforward)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_FaceForward(float n, float i, float nref)");
+		begin_scope();
+		statement("return i * nref < 0.0 ? n : -n;");
+		end_scope();
+		statement("");
+	}
 }
 
 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -1781,7 +1829,7 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 	string packing_offset;
 	bool is_push_constant = type.storage == StorageClassPushConstant;
 
-	if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) &&
+	if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
 	    has_member_decoration(type.self, index, DecorationOffset))
 	{
 		uint32_t offset = memb[index].offset - base_offset;
@@ -1807,16 +1855,20 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 		Bitset flags = ir.get_buffer_block_flags(var);
 		bool is_readonly = flags.get(DecorationNonWritable);
 		bool is_coherent = flags.get(DecorationCoherent);
+		bool is_interlocked = interlocked_resources.count(var.self) > 0;
+		const char *type_name = "ByteAddressBuffer ";
+		if (!is_readonly)
+			type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
 		add_resource_name(var.self);
-		statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
-		          to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+		statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+		          to_resource_binding(var), ";");
 	}
 	else
 	{
 		if (type.array.empty())
 		{
 			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset))
-				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+				set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 			else
 				SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
 
@@ -1902,7 +1954,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 			auto &type = get<SPIRType>(var.basetype);
 
 			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end))
-				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+				set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 			else
 				SPIRV_CROSS_THROW(
 				    "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
@@ -1973,9 +2025,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
 	}
 }
 
-string CompilerHLSL::to_func_call_arg(uint32_t id)
+string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
 {
-	string arg_str = CompilerGLSL::to_func_call_arg(id);
+	string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
 
 	if (hlsl_options.shader_model <= 30)
 		return arg_str;
@@ -2437,7 +2489,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
-	uint32_t img = ops[2];
+	VariableID img = ops[2];
 	uint32_t coord = ops[3];
 	uint32_t dref = 0;
 	uint32_t comp = 0;
@@ -2449,6 +2501,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	inherited_expressions.push_back(coord);
 
+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -2536,6 +2592,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	uint32_t offset = 0;
 	uint32_t coffsets = 0;
 	uint32_t sample = 0;
+	uint32_t minlod = 0;
 	uint32_t flags = 0;
 
 	if (length)
@@ -2562,10 +2619,14 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	test(offset, ImageOperandsOffsetMask);
 	test(coffsets, ImageOperandsConstOffsetsMask);
 	test(sample, ImageOperandsSampleMask);
+	test(minlod, ImageOperandsMinLodMask);
 
 	string expr;
 	string texop;
 
+	if (minlod != 0)
+		SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL.");
+
 	if (op == OpImageFetch)
 	{
 		if (hlsl_options.shader_model < 40)
@@ -2831,7 +2892,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		// according to GLSL spec, and it depends on the sampler itself.
 		// Just assume X == Y, so we will need to splat the result to a float2.
 		statement("float _", id, "_tmp = ", expr, ";");
-		emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true);
+		statement("float2 _", id, " = _", id, "_tmp.xx;");
+		set<SPIRExpression>(id, join("_", id), result_type, true);
 	}
 	else
 	{
@@ -2847,7 +2909,6 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	case OpImageSampleImplicitLod:
 	case OpImageSampleProjImplicitLod:
 	case OpImageSampleProjDrefImplicitLod:
-	case OpImageQueryLod:
 		register_control_dependent_expression(id);
 		break;
 
@@ -3198,8 +3259,11 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
 
 	case GLSLstd450FindILsb:
-		emit_unary_func_op(result_type, id, args[0], "firstbitlow");
+	{
+		auto basetype = expression_type(args[0]).basetype;
+		emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype);
 		break;
+	}
 
 	case GLSLstd450FindSMsb:
 		emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type);
@@ -3240,6 +3304,59 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 	}
 
+	case GLSLstd450Normalize:
+		// HLSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_reflect)
+			{
+				requires_scalar_reflect = true;
+				force_recompile();
+			}
+			emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_refract)
+			{
+				requires_scalar_refract = true;
+				force_recompile();
+			}
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450FaceForward:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_faceforward)
+			{
+				requires_scalar_faceforward = true;
+				force_recompile();
+			}
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_FaceForward");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@@ -3384,6 +3501,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t ptr = ops[2];
 
+		if (has_decoration(ptr, DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ptr);
+
 		auto load_expr = read_access_chain(*chain);
 
 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
@@ -3417,6 +3537,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 	// Make sure we trigger a read of the constituents in the access chain.
 	track_expression_read(chain.self);
 
+	if (has_decoration(chain.self, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(chain.self);
+
 	SPIRType target_type;
 	target_type.basetype = SPIRType::UInt;
 	target_type.vecsize = type.vecsize;
@@ -3601,7 +3724,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 		e.row_major_matrix = row_major_matrix;
 		e.matrix_stride = matrix_stride;
 		e.immutable = should_forward(ops[2]);
-		e.loaded_from = backing_variable ? backing_variable->self : 0;
+		e.loaded_from = backing_variable ? backing_variable->self : ID(0);
 
 		if (chain)
 		{
@@ -3909,6 +4032,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
 	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
 
 	switch (opcode)
 	{
@@ -3933,22 +4057,50 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpMatrixTimesVector:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
 
 	case OpVectorTimesMatrix:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
 
 	case OpMatrixTimesMatrix:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
 
+	case OpOuterProduct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
+
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
+		}
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
 	case OpFMod:
 	{
 		if (!requires_op_fmod)
@@ -4043,7 +4195,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(==, int_type);
 		break;
@@ -4051,12 +4203,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalEqual:
 	case OpFOrdEqual:
+	case OpFUnordEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+		// We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
+		// HACK: FUnordEqual will be implemented as FOrdEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(==);
 		break;
@@ -4068,7 +4227,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(!=, int_type);
 		break;
@@ -4076,12 +4235,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+	case OpFUnordNotEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+
+		// FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
+		// We would need to do something like not(UnordEqual), but that cannot be expressed either.
+		// Adding a lot of NaN checks would be a breaking change from perspective of performance.
+		// SPIR-V will generally use isnan() checks when this even matters.
+		// HACK: FOrdNotEqual will be implemented as FUnordEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(!=);
 		break;
@@ -4092,10 +4262,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
 		else
 			HLSL_BOP_CAST(>, type);
 		break;
@@ -4107,21 +4277,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>);
 		break;
 	}
 
+	case OpFUnordGreaterThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
 		else
 			HLSL_BOP_CAST(>=, type);
 		break;
@@ -4133,21 +4315,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>=);
 		break;
 	}
 
+	case OpFUnordGreaterThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThan:
 	case OpSLessThan:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
 		else
 			HLSL_BOP_CAST(<, type);
 		break;
@@ -4159,21 +4353,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<);
 		break;
 	}
 
+	case OpFUnordLessThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
 		else
 			HLSL_BOP_CAST(<=, type);
 		break;
@@ -4185,12 +4391,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<=);
 		break;
 	}
 
+	case OpFUnordLessThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpImageQueryLod:
 		emit_texture_op(instruction);
 		break;
@@ -4343,7 +4561,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		// When using the pointer, we need to know which variable it is actually loaded from.
 		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
+		e.loaded_from = var ? var->self : ID(0);
 		break;
 	}
 
@@ -4501,8 +4719,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	}
 
 	case OpBitCount:
-		HLSL_UFOP(countbits);
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype);
 		break;
+	}
 
 	case OpBitReverse:
 		HLSL_UFOP(reversebits);
@@ -4527,6 +4748,15 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpIsHelperInvocationEXT:
+		SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");
+
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (hlsl_options.shader_model < 51)
+			SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4601,7 +4831,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve
 	remap_vertex_attributes.push_back(vertex_attributes);
 }
 
-uint32_t CompilerHLSL::remap_num_workgroups_builtin()
+VariableID CompilerHLSL::remap_num_workgroups_builtin()
 {
 	update_active_builtins();
 
@@ -4683,23 +4913,28 @@ string CompilerHLSL::compile()
 	backend.uint16_t_literal_suffix = "u";
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
+	backend.demote_literal = "discard";
+	backend.boolean_mix_function = "";
 	backend.swizzle_is_function = false;
 	backend.shared_is_implied = true;
 	backend.unsized_array_supported = true;
 	backend.explicit_struct_type = false;
 	backend.use_initializer_list = true;
 	backend.use_constructor_splatting = false;
-	backend.boolean_mix_support = false;
 	backend.can_swizzle_scalar = true;
 	backend.can_declare_struct_inline = false;
 	backend.can_declare_arrays_inline = false;
 	backend.can_return_array = false;
 	backend.nonuniform_qualifier = "NonUniformResourceIndex";
+	backend.support_case_fallthrough = false;
 
+	fixup_type_alias();
+	reorder_type_alias();
 	build_function_control_flow_graphs_and_analyze();
 	validate_shader_model();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
 
 	// Subpass input needs SV_Position.
 	if (need_subpass_input)
diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
index d96c911..eb968f0 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
@@ -114,7 +114,7 @@ public:
 	// If non-zero, this returns the variable ID of a cbuffer which corresponds to
 	// the cbuffer declared above. By default, no binding or descriptor set decoration is set,
 	// so the calling application should declare explicit bindings on this ID before calling compile().
-	uint32_t remap_num_workgroups_builtin();
+	VariableID remap_num_workgroups_builtin();
 
 private:
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
@@ -145,7 +145,7 @@ private:
 	std::string layout_for_member(const SPIRType &type, uint32_t index) override;
 	std::string to_interpolation_qualifiers(const Bitset &flags) override;
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
-	std::string to_func_call_arg(uint32_t id) override;
+	std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_resource_binding(const SPIRVariable &var);
 	std::string to_resource_binding_sampler(const SPIRVariable &var);
@@ -167,6 +167,8 @@ private:
 	void replace_illegal_names() override;
 
 	Options hlsl_options;
+
+	// TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
 	bool requires_op_fmod = false;
 	bool requires_fp16_packing = false;
 	bool requires_explicit_fp16_packing = false;
@@ -179,6 +181,9 @@ private:
 	bool requires_inverse_2x2 = false;
 	bool requires_inverse_3x3 = false;
 	bool requires_inverse_4x4 = false;
+	bool requires_scalar_reflect = false;
+	bool requires_scalar_refract = false;
+	bool requires_scalar_faceforward = false;
 	uint64_t required_textureSizeVariants = 0;
 	void require_texture_query_variant(const SPIRType &type);
 
diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
index 4a4f77a..d7cb138 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
@@ -28,8 +28,6 @@ using namespace std;
 static const uint32_t k_unknown_location = ~0u;
 static const uint32_t k_unknown_component = ~0u;
 
-static const uint32_t k_aux_mbr_idx_swizzle_const = 0u;
-
 CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
     : CompilerGLSL(move(spirv_))
 {
@@ -59,7 +57,14 @@ void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va)
 
 void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
 {
-	resource_bindings.push_back({ binding, false });
+	StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
+	resource_bindings[tuple] = { binding, false };
+}
+
+void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
+{
+	SetBindingPair pair = { desc_set, binding };
+	buffers_requiring_dynamic_offset[pair] = { index, 0 };
 }
 
 void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
@@ -68,6 +73,17 @@ void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
 		argument_buffer_discrete_mask |= 1u << desc_set;
 }
 
+void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage)
+{
+	if (desc_set < kMaxArgumentBuffers)
+	{
+		if (device_storage)
+			argument_buffer_device_storage_mask |= 1u << desc_set;
+		else
+			argument_buffer_device_storage_mask &= ~(1u << desc_set);
+	}
+}
+
 bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
 {
 	return vtx_attrs_in_use.count(location) != 0;
@@ -75,12 +91,29 @@ bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
 
 bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding)
 {
-	auto itr = find_if(begin(resource_bindings), end(resource_bindings),
-	                   [&](const std::pair<MSLResourceBinding, bool> &resource) -> bool {
-		                   return model == resource.first.stage && desc_set == resource.first.desc_set &&
-		                          binding == resource.first.binding;
-	                   });
-	return itr != end(resource_bindings) && itr->second;
+	StageSetBinding tuple = { model, desc_set, binding };
+	auto itr = resource_bindings.find(tuple);
+	return itr != end(resource_bindings) && itr->second.second;
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary);
 }
 
 void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
@@ -93,7 +126,19 @@ void CompilerMSL::build_implicit_builtins()
 	bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
 	bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex;
 	bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl;
-	if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params)
+	bool need_subgroup_mask =
+	    active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) ||
+	    active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) ||
+	    active_input_builtins.get(BuiltInSubgroupLtMask);
+	bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
+	                                                       active_input_builtins.get(BuiltInSubgroupGtMask));
+	bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index &&
+	                      (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
+	bool need_dispatch_base =
+	    msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute &&
+	    (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId));
+	if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
+	    need_multiview || need_dispatch_base || needs_subgroup_invocation_id)
 	{
 		bool has_frag_coord = false;
 		bool has_sample_id = false;
@@ -103,18 +148,23 @@ void CompilerMSL::build_implicit_builtins()
 		bool has_base_instance = false;
 		bool has_invocation_id = false;
 		bool has_primitive_id = false;
+		bool has_subgroup_invocation_id = false;
+		bool has_subgroup_size = false;
+		bool has_view_idx = false;
+		uint32_t workgroup_id_type = 0;
 
 		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 			if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
 				return;
 
-			if (need_subpass_input && ir.meta[var.self].decoration.builtin_type == BuiltInFragCoord)
+			BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
+			if (need_subpass_input && builtin == BuiltInFragCoord)
 			{
 				builtin_frag_coord_id = var.self;
 				has_frag_coord = true;
 			}
 
-			if (need_sample_pos && ir.meta[var.self].decoration.builtin_type == BuiltInSampleId)
+			if (need_sample_pos && builtin == BuiltInSampleId)
 			{
 				builtin_sample_id_id = var.self;
 				has_sample_id = true;
@@ -122,7 +172,7 @@ void CompilerMSL::build_implicit_builtins()
 
 			if (need_vertex_params)
 			{
-				switch (ir.meta[var.self].decoration.builtin_type)
+				switch (builtin)
 				{
 				case BuiltInVertexIndex:
 					builtin_vertex_idx_id = var.self;
@@ -147,7 +197,7 @@ void CompilerMSL::build_implicit_builtins()
 
 			if (need_tesc_params)
 			{
-				switch (ir.meta[var.self].decoration.builtin_type)
+				switch (builtin)
 				{
 				case BuiltInInvocationId:
 					builtin_invocation_id_id = var.self;
@@ -161,6 +211,41 @@ void CompilerMSL::build_implicit_builtins()
 					break;
 				}
 			}
+
+			if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId)
+			{
+				builtin_subgroup_invocation_id_id = var.self;
+				has_subgroup_invocation_id = true;
+			}
+
+			if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
+			{
+				builtin_subgroup_size_id = var.self;
+				has_subgroup_size = true;
+			}
+
+			if (need_multiview)
+			{
+				if (builtin == BuiltInInstanceIndex)
+				{
+					// The view index here is derived from the instance index.
+					builtin_instance_idx_id = var.self;
+					has_instance_idx = true;
+				}
+
+				if (builtin == BuiltInViewIndex)
+				{
+					builtin_view_idx_id = var.self;
+					has_view_idx = true;
+				}
+			}
+
+			// The base workgroup needs to have the same type and vector size
+			// as the workgroup or invocation ID, so keep track of the type that
+			// was used.
+			if (need_dispatch_base && workgroup_id_type == 0 &&
+			    (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId))
+				workgroup_id_type = var.basetype;
 		});
 
 		if (!has_frag_coord && need_subpass_input)
@@ -188,6 +273,7 @@ void CompilerMSL::build_implicit_builtins()
 			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 			set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
 			builtin_frag_coord_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
 		}
 
 		if (!has_sample_id && need_sample_pos)
@@ -214,9 +300,11 @@ void CompilerMSL::build_implicit_builtins()
 			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 			set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
 			builtin_sample_id_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
 		}
 
-		if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
+		if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
+		    (need_multiview && (!has_instance_idx || !has_view_idx)))
 		{
 			uint32_t offset = ir.increase_bound_by(2);
 			uint32_t type_id = offset;
@@ -235,7 +323,7 @@ void CompilerMSL::build_implicit_builtins()
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
 			ptr_type.self = type_id;
 
-			if (!has_vertex_idx)
+			if (need_vertex_params && !has_vertex_idx)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -243,8 +331,10 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex);
 				builtin_vertex_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
 			}
-			if (!has_base_vertex)
+
+			if (need_vertex_params && !has_base_vertex)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -252,8 +342,10 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex);
 				builtin_base_vertex_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
 			}
-			if (!has_instance_idx)
+
+			if (!has_instance_idx) // Needed by both multiview and tessellation
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -261,8 +353,10 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
 				builtin_instance_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
 			}
-			if (!has_base_instance)
+
+			if (need_vertex_params && !has_base_instance)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -270,6 +364,39 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance);
 				builtin_base_instance_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
+			}
+
+			if (need_multiview)
+			{
+				// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+				// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+				// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+				// gl_Layer is an output in vertex-pipeline shaders.
+				uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+				SPIRType uint_type_ptr_out;
+				uint_type_ptr_out = uint_type;
+				uint_type_ptr_out.pointer = true;
+				uint_type_ptr_out.parent_type = type_id;
+				uint_type_ptr_out.storage = StorageClassOutput;
+				auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+				ptr_out_type.self = type_id;
+				uint32_t var_id = type_ptr_out_id + 1;
+				set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+				builtin_layer_id = var_id;
+				mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+			}
+
+			if (need_multiview && !has_view_idx)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+
+				// Create gl_ViewIndex.
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
+				builtin_view_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
 			}
 		}
 
@@ -300,7 +427,9 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId);
 				builtin_invocation_id_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id);
 			}
+
 			if (!has_primitive_id)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
@@ -309,56 +438,198 @@ void CompilerMSL::build_implicit_builtins()
 				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 				set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId);
 				builtin_primitive_id_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id);
+			}
+		}
+
+		if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id))
+		{
+			uint32_t offset = ir.increase_bound_by(3);
+			uint32_t type_id = offset;
+			uint32_t type_ptr_id = offset + 1;
+			uint32_t var_id = offset + 2;
+
+			// Create gl_SubgroupInvocationID.
+			SPIRType uint_type;
+			uint_type.basetype = SPIRType::UInt;
+			uint_type.width = 32;
+			set<SPIRType>(type_id, uint_type);
+
+			SPIRType uint_type_ptr;
+			uint_type_ptr = uint_type;
+			uint_type_ptr.pointer = true;
+			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.storage = StorageClassInput;
+			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+			ptr_type.self = type_id;
+
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId);
+			builtin_subgroup_invocation_id_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
+		}
+
+		if (!has_subgroup_size && need_subgroup_ge_mask)
+		{
+			uint32_t offset = ir.increase_bound_by(3);
+			uint32_t type_id = offset;
+			uint32_t type_ptr_id = offset + 1;
+			uint32_t var_id = offset + 2;
+
+			// Create gl_SubgroupSize.
+			SPIRType uint_type;
+			uint_type.basetype = SPIRType::UInt;
+			uint_type.width = 32;
+			set<SPIRType>(type_id, uint_type);
+
+			SPIRType uint_type_ptr;
+			uint_type_ptr = uint_type;
+			uint_type_ptr.pointer = true;
+			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.storage = StorageClassInput;
+			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+			ptr_type.self = type_id;
+
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
+			builtin_subgroup_size_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
+		}
+
+		if (need_dispatch_base)
+		{
+			uint32_t var_id;
+			if (msl_options.supports_msl_version(1, 2))
+			{
+				// If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin
+				// to convey this information and save a buffer slot.
+				uint32_t offset = ir.increase_bound_by(1);
+				var_id = offset;
+
+				set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput);
+				set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase);
+				get_entry_point().interface_variables.push_back(var_id);
+			}
+			else
+			{
+				// Otherwise, we need to fall back to a good ol' fashioned buffer.
+				uint32_t offset = ir.increase_bound_by(2);
+				var_id = offset;
+				uint32_t type_id = offset + 1;
+
+				SPIRType var_type = get<SPIRType>(workgroup_id_type);
+				var_type.storage = StorageClassUniform;
+				set<SPIRType>(type_id, var_type);
+
+				set<SPIRVariable>(var_id, type_id, StorageClassUniform);
+				// This should never match anything.
+				set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+				set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index);
+				set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+				                        msl_options.indirect_params_buffer_index);
 			}
+			set_name(var_id, "spvDispatchBase");
+			builtin_dispatch_base_id = var_id;
 		}
 	}
 
-	if (needs_aux_buffer_def)
-	{
-		uint32_t offset = ir.increase_bound_by(5);
-		uint32_t type_id = offset;
-		uint32_t type_arr_id = offset + 1;
-		uint32_t struct_id = offset + 2;
-		uint32_t struct_ptr_id = offset + 3;
-		uint32_t var_id = offset + 4;
-
-		// Create a buffer to hold extra data, including the swizzle constants.
-		SPIRType uint_type;
-		uint_type.basetype = SPIRType::UInt;
-		uint_type.width = 32;
-		set<SPIRType>(type_id, uint_type);
-
-		SPIRType uint_type_arr = uint_type;
-		uint_type_arr.array.push_back(0);
-		uint_type_arr.array_size_literal.push_back(true);
-		uint_type_arr.parent_type = type_id;
-		set<SPIRType>(type_arr_id, uint_type_arr);
-		set_decoration(type_arr_id, DecorationArrayStride, 4);
-
-		SPIRType struct_type;
-		struct_type.basetype = SPIRType::Struct;
-		struct_type.member_types.push_back(type_arr_id);
-		auto &type = set<SPIRType>(struct_id, struct_type);
-		type.self = struct_id;
-		set_decoration(struct_id, DecorationBlock);
-		set_name(struct_id, "spvAux");
-		set_member_name(struct_id, k_aux_mbr_idx_swizzle_const, "swizzleConst");
-		set_member_decoration(struct_id, k_aux_mbr_idx_swizzle_const, DecorationOffset, 0);
-
-		SPIRType struct_type_ptr = struct_type;
-		struct_type_ptr.pointer = true;
-		struct_type_ptr.parent_type = struct_id;
-		struct_type_ptr.storage = StorageClassUniform;
-		auto &ptr_type = set<SPIRType>(struct_ptr_id, struct_type_ptr);
-		ptr_type.self = struct_id;
-
-		set<SPIRVariable>(var_id, struct_ptr_id, StorageClassUniform);
-		set_name(var_id, "spvAuxBuffer");
+	if (needs_swizzle_buffer_def)
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvSwizzleConstants");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding);
+		set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index);
+		swizzle_buffer_id = var_id;
+	}
+
+	if (!buffers_requiring_array_length.empty())
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvBufferSizeConstants");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding);
+		set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
+		buffer_size_buffer_id = var_id;
+	}
+
+	if (needs_view_mask_buffer())
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvViewMask");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, ~(4u));
+		set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
+		view_mask_buffer_id = var_id;
+	}
+
+	if (!buffers_requiring_dynamic_offset.empty())
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvDynamicOffsets");
 		// This should never match anything.
-		set_decoration(var_id, DecorationDescriptorSet, 0xFFFFFFFE);
-		set_decoration(var_id, DecorationBinding, msl_options.aux_buffer_index);
-		aux_buffer_id = var_id;
+		set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+		set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+		                        msl_options.dynamic_offsets_buffer_index);
+		dynamic_offsets_buffer_id = var_id;
+	}
+}
+
+void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
+{
+	Bitset *active_builtins = nullptr;
+	switch (storage)
+	{
+	case StorageClassInput:
+		active_builtins = &active_input_builtins;
+		break;
+
+	case StorageClassOutput:
+		active_builtins = &active_output_builtins;
+		break;
+
+	default:
+		break;
 	}
+
+	assert(active_builtins != nullptr);
+	active_builtins->set(builtin);
+	get_entry_point().interface_variables.push_back(id);
+}
+
+uint32_t CompilerMSL::build_constant_uint_array_pointer()
+{
+	uint32_t offset = ir.increase_bound_by(4);
+	uint32_t type_id = offset;
+	uint32_t type_ptr_id = offset + 1;
+	uint32_t type_ptr_ptr_id = offset + 2;
+	uint32_t var_id = offset + 3;
+
+	// Create a buffer to hold extra data, including the swizzle constants.
+	SPIRType uint_type;
+	uint_type.basetype = SPIRType::UInt;
+	uint_type.width = 32;
+	set<SPIRType>(type_id, uint_type);
+
+	SPIRType uint_type_pointer = uint_type;
+	uint_type_pointer.pointer = true;
+	uint_type_pointer.pointer_depth = 1;
+	uint_type_pointer.parent_type = type_id;
+	uint_type_pointer.storage = StorageClassUniform;
+	set<SPIRType>(type_ptr_id, uint_type_pointer);
+	set_decoration(type_ptr_id, DecorationArrayStride, 4);
+
+	SPIRType uint_type_pointer2 = uint_type_pointer;
+	uint_type_pointer2.pointer_depth++;
+	uint_type_pointer2.parent_type = type_ptr_id;
+	set<SPIRType>(type_ptr_ptr_id, uint_type_pointer2);
+
+	set<SPIRVariable>(var_id, type_ptr_ptr_id, StorageClassUniformConstant);
+	return var_id;
 }
 
 static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
@@ -416,7 +687,7 @@ void CompilerMSL::emit_entry_point_declarations()
 	// FIXME: Get test coverage here ...
 
 	// Emit constexpr samplers here.
-	for (auto &samp : constexpr_samplers)
+	for (auto &samp : constexpr_samplers_by_id)
 	{
 		auto &var = get<SPIRVariable>(samp.first);
 		auto &type = get<SPIRType>(var.basetype);
@@ -532,9 +803,82 @@ void CompilerMSL::emit_entry_point_declarations()
 			                    convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")"));
 		}
 
-		statement("constexpr sampler ",
-		          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
-		          "(", merge(args), ");");
+		// If we would emit no arguments, then omit the parentheses entirely. Otherwise,
+		// we'll wind up with a "most vexing parse" situation.
+		if (args.empty())
+			statement("constexpr sampler ",
+			          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+			          ";");
+		else
+			statement("constexpr sampler ",
+			          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+			          "(", merge(args), ");");
+	}
+
+	// Emit dynamic buffers here.
+	for (auto &dynamic_buffer : buffers_requiring_dynamic_offset)
+	{
+		if (!dynamic_buffer.second.second)
+		{
+			// Could happen if no buffer was used at requested binding point.
+			continue;
+		}
+
+		const auto &var = get<SPIRVariable>(dynamic_buffer.second.second);
+		uint32_t var_id = var.self;
+		const auto &type = get_variable_data_type(var);
+		string name = to_name(var.self);
+		uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
+		uint32_t arg_id = argument_buffer_ids[desc_set];
+		uint32_t base_index = dynamic_buffer.second.first;
+
+		if (!type.array.empty())
+		{
+			// This is complicated, because we need to support arrays of arrays.
+			// And it's even worse if the outermost dimension is a runtime array, because now
+			// all this complicated goop has to go into the shader itself. (FIXME)
+			if (!type.array[type.array.size() - 1])
+				SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
+			else
+			{
+				statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
+				          type_to_array_glsl(type), " =");
+				uint32_t dim = uint32_t(type.array.size());
+				uint32_t j = 0;
+				for (SmallVector<uint32_t> indices(type.array.size());
+				     indices[type.array.size() - 1] < to_array_size_literal(type); j++)
+				{
+					while (dim > 0)
+					{
+						begin_scope();
+						--dim;
+					}
+
+					string arrays;
+					for (uint32_t i = uint32_t(type.array.size()); i; --i)
+						arrays += join("[", indices[i - 1], "]");
+					statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
+					          to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
+					          to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
+					          arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
+
+					while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
+					{
+						end_scope(",");
+						indices[dim++] = 0;
+					}
+				}
+				end_scope_decl();
+				statement_no_indent("");
+			}
+		}
+		else
+		{
+			statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(",
+			          get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
+			          get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
+			          ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
+		}
 	}
 
 	// Emit buffer arrays here.
@@ -543,10 +887,10 @@ void CompilerMSL::emit_entry_point_declarations()
 		const auto &var = get<SPIRVariable>(array_id);
 		const auto &type = get_variable_data_type(var);
 		string name = to_name(array_id);
-		statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
+		statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
 		begin_scope();
-		for (uint32_t i = 0; i < type.array[0]; ++i)
-			statement(name + "_" + convert_to_string(i) + ",");
+		for (uint32_t i = 0; i < to_array_size_literal(type); ++i)
+			statement(name, "_", i, ",");
 		end_scope_decl();
 		statement_no_indent("");
 	}
@@ -564,7 +908,7 @@ string CompilerMSL::compile()
 	backend.float_literal_suffix = false;
 	backend.uint32_t_literal_suffix = true;
 	backend.int16_t_literal_suffix = "";
-	backend.uint16_t_literal_suffix = "u";
+	backend.uint16_t_literal_suffix = "";
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
 	backend.basic_int8_type = "char";
@@ -572,6 +916,8 @@ string CompilerMSL::compile()
 	backend.basic_int16_type = "short";
 	backend.basic_uint16_type = "ushort";
 	backend.discard_literal = "discard_fragment()";
+	backend.demote_literal = "unsupported-demote";
+	backend.boolean_mix_function = "select";
 	backend.swizzle_is_function = false;
 	backend.shared_is_implied = false;
 	backend.use_initializer_list = true;
@@ -580,34 +926,46 @@ string CompilerMSL::compile()
 	backend.unsized_array_supported = false;
 	backend.can_declare_arrays_inline = false;
 	backend.can_return_array = false;
-	backend.boolean_mix_support = false;
 	backend.allow_truncated_access_chain = true;
 	backend.array_is_value_type = false;
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
+	backend.support_small_type_sampling_result = true;
 
 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
 
-	replace_illegal_names();
+	// Initialize array here rather than constructor, MSVC 2013 workaround.
+	for (auto &id : next_metal_resource_ids)
+		id = 0;
 
-	struct_member_padding.clear();
+	fixup_type_alias();
+	replace_illegal_names();
 
 	build_function_control_flow_graphs_and_analyze();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
 	analyze_sampled_image_usage();
+	analyze_interlocked_resource_usage();
+	preprocess_op_codes();
 	build_implicit_builtins();
 
 	fixup_image_load_store_access();
 
 	set_enabled_interface_variables(get_active_interface_variables());
-	if (aux_buffer_id)
-		active_interface_variables.insert(aux_buffer_id);
-
-	// Preprocess OpCodes to extract the need to output additional header content
-	preprocess_op_codes();
+	if (swizzle_buffer_id)
+		active_interface_variables.insert(swizzle_buffer_id);
+	if (buffer_size_buffer_id)
+		active_interface_variables.insert(buffer_size_buffer_id);
+	if (view_mask_buffer_id)
+		active_interface_variables.insert(view_mask_buffer_id);
+	if (dynamic_offsets_buffer_id)
+		active_interface_variables.insert(dynamic_offsets_buffer_id);
+	if (builtin_layer_id)
+		active_interface_variables.insert(builtin_layer_id);
+	if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
+		active_interface_variables.insert(builtin_dispatch_base_id);
 
 	// Create structs to hold input, output and uniform variables.
 	// Do output first to ensure out. is declared at top of entry function.
@@ -633,6 +991,7 @@ string CompilerMSL::compile()
 
 	// Mark any non-stage-in structs to be tightly packed.
 	mark_packable_structs();
+	reorder_type_alias();
 
 	// Add fixup hooks required by shader inputs and outputs. This needs to happen before
 	// the loop, so the hooks aren't added multiple times.
@@ -659,6 +1018,8 @@ string CompilerMSL::compile()
 		next_metal_resource_index_buffer = 0;
 		next_metal_resource_index_texture = 0;
 		next_metal_resource_index_sampler = 0;
+		for (auto &id : next_metal_resource_ids)
+			id = 0;
 
 		// Move constructor for this type is broken on GCC 4.9 ...
 		buffer.reset();
@@ -700,6 +1061,9 @@ void CompilerMSL::preprocess_op_codes()
 		is_rasterization_disabled = true;
 		capture_output_to_buffer = true;
 	}
+
+	if (preproc.needs_subgroup_invocation_id)
+		needs_subgroup_invocation_id = true;
 }
 
 // Move the Private and Workgroup global variables to the entry function.
@@ -783,6 +1147,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 			case OpInBoundsAccessChain:
 			case OpAccessChain:
 			case OpPtrAccessChain:
+			case OpArrayLength:
 			{
 				uint32_t base_id = ops[2];
 				if (global_var_ids.find(base_id) != global_var_ids.end())
@@ -890,7 +1255,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 					added_out = true;
 				}
 				type_id = get<SPIRVariable>(arg_id).basetype;
-				p_type = &get<SPIRType>(type_id);
 				uint32_t next_id = ir.increase_bound_by(1);
 				func.add_parameter(type_id, next_id, true);
 				set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
@@ -962,7 +1326,7 @@ void CompilerMSL::mark_packable_structs()
 }
 
 // If the specified type is a struct, it and any nested structs
-// are marked as packable with the SPIRVCrossDecorationPacked decoration,
+// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration,
 void CompilerMSL::mark_as_packable(SPIRType &type)
 {
 	// If this is not the base type (eg. it's a pointer or array), tunnel down
@@ -974,10 +1338,10 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 
 	if (type.basetype == SPIRType::Struct)
 	{
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
 
 		// Recurse
-		size_t mbr_cnt = type.member_types.size();
+		uint32_t mbr_cnt = uint32_t(type.member_types.size());
 		for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 		{
 			uint32_t mbr_type_id = type.member_types[mbr_idx];
@@ -1080,7 +1444,7 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
 	else if (!strip_array)
 		ir.meta[var.self].decoration.qualified_alias = qual_var_name;
 
-	if (var.storage == StorageClassOutput && var.initializer != 0)
+	if (var.storage == StorageClassOutput && var.initializer != ID(0))
 	{
 		entry_func.fixup_hooks_in.push_back(
 		    [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); });
@@ -1768,8 +2132,7 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t
 	bool in_array = false;
 	for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++)
 	{
-		auto &mbr_dec = ir.meta[ib_type_id].members[i];
-		uint32_t var_id = mbr_dec.extended.ib_orig_id;
+		uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID);
 		if (!var_id)
 			continue;
 		auto &var = get<SPIRVariable>(var_id);
@@ -1823,21 +2186,50 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t
 // Returns the ID of the newly added variable, or zero if no variable was added.
 uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 {
-	// Accumulate the variables that should appear in the interface struct
+	// Accumulate the variables that should appear in the interface struct.
 	SmallVector<SPIRVariable *> vars;
-	bool incl_builtins = (storage == StorageClassOutput || is_tessellation_shader());
+	bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader();
+	bool has_seen_barycentric = false;
 
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
+		if (var.storage != storage)
+			return;
+
 		auto &type = this->get<SPIRType>(var.basetype);
-		BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
-		if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) &&
-		    !is_hidden_variable(var, incl_builtins) && type.pointer &&
-		    (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch &&
-		    (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
-		     bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer ||
-		     bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask ||
-		     (get_execution_model() == ExecutionModelTessellationEvaluation &&
-		      (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner))))
+
+		bool is_builtin = is_builtin_variable(var);
+		auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
+
+		// These builtins are part of the stage in/out structs.
+		bool is_interface_block_builtin =
+		    (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance ||
+		     bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
+		     bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth ||
+		     bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) ||
+		    (get_execution_model() == ExecutionModelTessellationEvaluation &&
+		     (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
+
+		bool is_active = interface_variable_exists_in_entry_point(var.self);
+		if (is_builtin && is_active)
+		{
+			// Only emit the builtin if it's active in this entry point. Interface variable list might lie.
+			is_active = has_active_builtin(bi_type, storage);
+		}
+
+		bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch;
+
+		bool hidden = is_hidden_variable(var, incl_builtins);
+		// Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments.
+		if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV))
+		{
+			if (has_seen_barycentric)
+				SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL.");
+			has_seen_barycentric = true;
+			hidden = false;
+		}
+
+		if (is_active && !hidden && type.pointer && filter_patch_decoration &&
+		    (!is_builtin || is_interface_block_builtin))
 		{
 			vars.push_back(&var);
 		}
@@ -1957,7 +2349,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 	set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref);
 	set_name(ib_var_id, ib_var_ref);
 
-	for (auto p_var : vars)
+	for (auto *p_var : vars)
 	{
 		bool strip_array =
 		    (get_execution_model() == ExecutionModelTessellationControl ||
@@ -2055,7 +2447,8 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil
 	auto &type = get<SPIRType>(type_id);
 
 	if ((builtin == BuiltInSampleMask && is_array(type)) ||
-	    ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex) && type.basetype != SPIRType::UInt))
+	    ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) &&
+	     type.basetype != SPIRType::UInt))
 	{
 		uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
 		uint32_t base_type_id = next_id++;
@@ -2163,188 +2556,621 @@ uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t l
 	return type_id;
 }
 
+void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
+{
+	set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
+
+	// Problem case! Struct needs to be placed at an awkward alignment.
+	// Mark every member of the child struct as packed.
+	uint32_t mbr_cnt = uint32_t(type.member_types.size());
+	for (uint32_t i = 0; i < mbr_cnt; i++)
+	{
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
+		if (mbr_type.basetype == SPIRType::Struct)
+		{
+			// Recursively mark structs as packed.
+			auto *struct_type = &mbr_type;
+			while (!struct_type->array.empty())
+				struct_type = &get<SPIRType>(struct_type->parent_type);
+			mark_struct_members_packed(*struct_type);
+		}
+		else if (!is_scalar(mbr_type))
+			set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked);
+	}
+}
+
+void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
+{
+	uint32_t mbr_cnt = uint32_t(type.member_types.size());
+	for (uint32_t i = 0; i < mbr_cnt; i++)
+	{
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
+		if (mbr_type.basetype == SPIRType::Struct)
+		{
+			auto *struct_type = &mbr_type;
+			while (!struct_type->array.empty())
+				struct_type = &get<SPIRType>(struct_type->parent_type);
+
+			if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked))
+				continue;
+
+			uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i);
+			uint32_t msl_size = get_declared_struct_member_size_msl(type, i);
+			uint32_t spirv_offset = type_struct_member_offset(type, i);
+			uint32_t spirv_offset_next;
+			if (i + 1 < mbr_cnt)
+				spirv_offset_next = type_struct_member_offset(type, i + 1);
+			else
+				spirv_offset_next = spirv_offset + msl_size;
+
+			// Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes,
+			// and the next member will be placed at offset 12.
+			bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0;
+			bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next;
+			uint32_t array_stride = 0;
+			bool struct_needs_explicit_padding = false;
+
+			// Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct.
+			if (!mbr_type.array.empty())
+			{
+				array_stride = type_struct_member_array_stride(type, i);
+				uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+				for (uint32_t dim = 0; dim < dimensions; dim++)
+				{
+					uint32_t array_size = to_array_size_literal(mbr_type, dim);
+					array_stride /= max(array_size, 1u);
+				}
+
+				// Set expected struct size based on ArrayStride.
+				struct_needs_explicit_padding = true;
+
+				// If struct size is larger than array stride, we might be able to fit, if we tightly pack.
+				if (get_declared_struct_size_msl(*struct_type) > array_stride)
+					struct_is_too_large = true;
+			}
+
+			if (struct_is_misaligned || struct_is_too_large)
+				mark_struct_members_packed(*struct_type);
+			mark_scalar_layout_structs(*struct_type);
+
+			if (struct_needs_explicit_padding)
+			{
+				msl_size = get_declared_struct_size_msl(*struct_type, true, true);
+				if (array_stride < msl_size)
+				{
+					SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type.");
+				}
+				else
+				{
+					if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+					{
+						if (array_stride !=
+						    get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+							SPIRV_CROSS_THROW(
+							    "A struct is used with different array strides. Cannot express this in MSL.");
+					}
+					else
+						set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride);
+				}
+			}
+		}
+	}
+}
+
 // Sort the members of the struct type by offset, and pack and then pad members where needed
 // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
 // occurs first, followed by padding, because packing a member reduces both its size and its
 // natural alignment, possibly requiring a padding member to be added ahead of it.
-void CompilerMSL::align_struct(SPIRType &ib_type)
+void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs)
 {
-	uint32_t &ib_type_id = ib_type.self;
+	// We align structs recursively, so stop any redundant work.
+	ID &ib_type_id = ib_type.self;
+	if (aligned_structs.count(ib_type_id))
+		return;
+	aligned_structs.insert(ib_type_id);
 
 	// Sort the members of the interface structure by their offset.
 	// They should already be sorted per SPIR-V spec anyway.
 	MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
 	member_sorter.sort();
 
-	uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
+	auto mbr_cnt = uint32_t(ib_type.member_types.size());
+
+	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+	{
+		// Pack any dependent struct types before we pack a parent struct.
+		auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]);
+		if (mbr_type.basetype == SPIRType::Struct)
+			align_struct(mbr_type, aligned_structs);
+	}
 
 	// Test the alignment of each member, and if a member should be closer to the previous
 	// member than the default spacing expects, it is likely that the previous member is in
 	// a packed format. If so, and the previous member is packable, pack it.
-	// For example...this applies to any 3-element vector that is followed by a scalar.
-	uint32_t curr_offset = 0;
+	// For example ... this applies to any 3-element vector that is followed by a scalar.
+	uint32_t msl_offset = 0;
 	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 	{
-		if (is_member_packable(ib_type, mbr_idx))
-		{
-			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked);
-			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType,
-			                               ib_type.member_types[mbr_idx]);
-		}
+		// This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V
+		// offsets, array strides and matrix strides.
+		ensure_member_packing_rules_msl(ib_type, mbr_idx);
 
-		// Align current offset to the current member's default alignment.
-		size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
-		uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
+		// Align current offset to the current member's default alignment. If the member was packed, it will observe
+		// the updated alignment here.
+		uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1;
+		uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
 
 		// Fetch the member offset as declared in the SPIRV.
-		uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
-		if (mbr_offset > aligned_curr_offset)
+		uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
+		if (spirv_mbr_offset > aligned_msl_offset)
 		{
 			// Since MSL and SPIR-V have slightly different struct member alignment and
-			// size rules, we'll pad to standard C-packing rules. If the member is farther
+			// size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther
 			// away than C-packing, expects, add an inert padding member before the the member.
-			MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx);
-			struct_member_padding[key] = mbr_offset - curr_offset;
+			uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset;
+			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes);
+
+			// Re-align as a sanity check that aligning post-padding matches up.
+			msl_offset += padding_bytes;
+			aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
+		}
+		else if (spirv_mbr_offset < aligned_msl_offset)
+		{
+			// This should not happen, but deal with unexpected scenarios.
+			// It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V.
+			SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL.");
 		}
 
+		assert(aligned_msl_offset == spirv_mbr_offset);
+
 		// Increment the current offset to be positioned immediately after the current member.
 		// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
 		if (mbr_idx + 1 < mbr_cnt)
-			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
+			msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx);
 	}
 }
 
-// Returns whether the specified struct member supports a packable type
-// variation that is smaller than the unpacked variation of that type.
-bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
+bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const
 {
-	// We've already marked it as packable
-	if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
-		return true;
-
-	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
-
-	uint32_t component_size = mbr_type.width / 8;
-	uint32_t unpacked_mbr_size;
-	if (mbr_type.vecsize == 3)
-		unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
-	else
-		unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
+	auto &mbr_type = get<SPIRType>(type.member_types[index]);
+	uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset);
+
+	if (index + 1 < type.member_types.size())
+	{
+		// First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member,
+		// we *must* perform some kind of remapping, no way getting around it.
+		// We can always pad after this member if necessary, so that case is fine.
+		uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset);
+		assert(spirv_offset_next >= spirv_offset);
+		uint32_t maximum_size = spirv_offset_next - spirv_offset;
+		uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index);
+		if (msl_mbr_size > maximum_size)
+			return false;
+	}
 
-	// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
-	// but we will use the same mechanism.
-	if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 &&
-	    type_struct_member_array_stride(ib_type, index) == 4 * component_size)
+	if (!mbr_type.array.empty())
 	{
-		return true;
+		// If we have an array type, array stride must match exactly with SPIR-V.
+		uint32_t spirv_array_stride = type_struct_member_array_stride(type, index);
+		uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index);
+		if (spirv_array_stride != msl_array_stride)
+			return false;
 	}
 
-	// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
-	// This can happen for struct A { float a }; A a[]; in std140 layout.
-	// TODO: Emit a padded struct which can be used for this purpose.
-	if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct)
+	if (is_matrix(mbr_type))
 	{
-		size_t declared_struct_size = get_declared_struct_size(mbr_type);
-		size_t alignment = get_declared_struct_member_alignment(ib_type, index);
-		declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
-		if (type_struct_member_array_stride(ib_type, index) > declared_struct_size)
-			return true;
+		// Need to check MatrixStride as well.
+		uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index);
+		uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index);
+		if (spirv_matrix_stride != msl_matrix_stride)
+			return false;
 	}
 
-	// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
-	//if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16)
-	//	SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout.");
-
-	// Only vectors or 3-row matrices need to be packed.
-	if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
+	// Now, we check alignment.
+	uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index);
+	if ((spirv_offset % msl_alignment) != 0)
 		return false;
 
-	// Only row-major matrices need to be packed.
-	if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
-		return false;
+	// We're in the clear.
+	return true;
+}
 
-	if (is_array(mbr_type))
-	{
-		// If member is an array, and the array stride is larger than the type needs, don't pack it.
-		// Take into consideration multi-dimentional arrays.
-		uint32_t md_elem_cnt = 1;
-		size_t last_elem_idx = mbr_type.array.size() - 1;
-		for (uint32_t i = 0; i < last_elem_idx; i++)
-			md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u);
+// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions.
+// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types.
+// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides.
+void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index)
+{
+	if (validate_member_packing_rules_msl(ib_type, index))
+		return;
+
+	// We failed validation.
+	// This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
+	// match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
+	// that struct alignment == max alignment of all members and struct size depends on this alignment.
+	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
+	if (mbr_type.basetype == SPIRType::Struct)
+		SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
 
-		uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
+	// Perform remapping here.
+	set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+
+	// Try validating again, now with packed.
+	if (validate_member_packing_rules_msl(ib_type, index))
+		return;
+
+	// We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect.
+	// A lot of work goes here ...
+	// We will need remapping on Load and Store to translate the types between Logical and Physical.
+
+	// First, we check if we have small vector std140 array.
+	// We detect this if we have an array of vectors, and array stride is greater than number of elements.
+	if (!mbr_type.array.empty() && !is_matrix(mbr_type))
+	{
 		uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
-		return unpacked_array_stride > array_stride;
+
+		// Hack off array-of-arrays until we find the array stride per element we must have to make it work.
+		uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+		for (uint32_t dim = 0; dim < dimensions; dim++)
+			array_stride /= max(to_array_size_literal(mbr_type, dim), 1u);
+
+		uint32_t elems_per_stride = array_stride / (mbr_type.width / 8);
+
+		if (elems_per_stride == 3)
+			SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+		else if (elems_per_stride > 4)
+			SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+		auto physical_type = mbr_type;
+		physical_type.vecsize = elems_per_stride;
+		physical_type.parent_type = 0;
+		uint32_t type_id = ir.increase_bound_by(1);
+		set<SPIRType>(type_id, physical_type);
+		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+		set_decoration(type_id, DecorationArrayStride, array_stride);
+
+		// Remove packed_ for vectors of size 1, 2 and 4.
+		if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked))
+			SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix "
+			                  "scalar and std140 layout rules.");
+		else
+			unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
 	}
-	else
+	else if (is_matrix(mbr_type))
 	{
-		uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
-		// For vectors, pack if the member's offset doesn't conform to the
-		// type's usual alignment. For example, a float3 at offset 4.
-		if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size))
-			return true;
-		// Pack if there is not enough space between this member and next.
-		// If last member, only pack if it's a row-major matrix.
-		if (index < ib_type.member_types.size() - 1)
-		{
-			uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
-			return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
-		}
+		// MatrixStride might be std140-esque.
+		uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index);
+
+		uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8);
+
+		if (elems_per_stride == 3)
+			SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+		else if (elems_per_stride > 4)
+			SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+		bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
+
+		auto physical_type = mbr_type;
+		physical_type.parent_type = 0;
+		if (row_major)
+			physical_type.columns = elems_per_stride;
+		else
+			physical_type.vecsize = elems_per_stride;
+		uint32_t type_id = ir.increase_bound_by(1);
+		set<SPIRType>(type_id, physical_type);
+		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+
+		// Remove packed_ for vectors of size 1, 2 and 4.
+		if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked))
+			SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix "
+			                  "scalar and std140 layout rules.");
 		else
-			return is_matrix(mbr_type);
+			unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
 	}
-}
 
-// Returns a combination of type ID and member index for use as hash key
-MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index)
-{
-	MSLStructMemberKey k = type_id;
-	k <<= 32;
-	k += index;
-	return k;
+	// This better validate now, or we must fail gracefully.
+	if (!validate_member_packing_rules_msl(ib_type, index))
+		SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
 }
 
 void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
 {
-	if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) ||
-	    get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0)
+	auto &type = expression_type(rhs_expression);
+
+	bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID);
+	bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
+	auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
+
+	bool transpose = lhs_e && lhs_e->need_transpose;
+
+	// No physical type remapping, and no packed type, so can just emit a store directly.
+	if (!lhs_remapped_type && !lhs_packed_type)
+	{
+		// We might not be dealing with remapped physical types or packed types,
+		// but we might be doing a clean store to a row-major matrix.
+		// In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any.
+		if (is_matrix(type) && lhs_e && lhs_e->need_transpose)
+		{
+			if (!rhs_e)
+				SPIRV_CROSS_THROW("Need to transpose right-side expression of a store to row-major matrix, but it is "
+				                  "not a SPIRExpression.");
+			lhs_e->need_transpose = false;
+
+			if (rhs_e && rhs_e->need_transpose)
+			{
+				// Direct copy, but might need to unpack RHS.
+				// Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T.
+				rhs_e->need_transpose = false;
+				statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression),
+				          ";");
+				rhs_e->need_transpose = true;
+			}
+			else
+				statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");");
+
+			lhs_e->need_transpose = true;
+			register_write(lhs_expression);
+		}
+		else if (lhs_e && lhs_e->need_transpose)
+		{
+			lhs_e->need_transpose = false;
+
+			// Storing a column to a row-major matrix. Unroll the write.
+			for (uint32_t c = 0; c < type.vecsize; c++)
+			{
+				auto lhs_expr = to_dereferenced_expression(lhs_expression);
+				auto column_index = lhs_expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
+					          to_extract_component_expression(rhs_expression, c), ";");
+				}
+			}
+			lhs_e->need_transpose = true;
+			register_write(lhs_expression);
+		}
+		else
+			CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
+	}
+	else if (!lhs_remapped_type && !is_matrix(type) && !transpose)
 	{
+		// Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly,
+		// since they are declared as array of vectors instead, and we need the fallback path below.
 		CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
 	}
 	else
 	{
-		// Special handling when storing to a float[] or float2[] in std140 layout.
+		// Special handling when storing to a remapped physical type.
+		// This is mostly to deal with std140 padded matrices or vectors.
+
+		TypeID physical_type_id = lhs_remapped_type ?
+		                              ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) :
+		                              type.self;
+
+		auto &physical_type = get<SPIRType>(physical_type_id);
+
+		static const char *swizzle_lut[] = {
+			".x",
+			".xy",
+			".xyz",
+			"",
+		};
+
+		if (is_matrix(type))
+		{
+			// Packed matrices are stored as arrays of packed vectors, so we need
+			// to assign the vectors one at a time.
+			// For row-major matrices, we need to transpose the *right-hand* side,
+			// not the left-hand side.
 
-		auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
-		string lhs = to_dereferenced_expression(lhs_expression);
-		string rhs = to_pointer_expression(rhs_expression);
+			// Lots of cases to cover here ...
 
-		// Unpack the expression so we can store to it with a float or float2.
-		// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
-		if (is_scalar(type) && is_array(type))
-			lhs = enclose_expression(lhs) + ".x";
-		else if (is_vector(type) && type.vecsize == 2 && is_array(type))
-			lhs = enclose_expression(lhs) + ".xy";
+			bool rhs_transpose = rhs_e && rhs_e->need_transpose;
+
+			// We're dealing with transpose manually.
+			if (rhs_transpose)
+				rhs_e->need_transpose = false;
+
+			if (transpose)
+			{
+				// We're dealing with transpose manually.
+				lhs_e->need_transpose = false;
+
+				const char *store_swiz = "";
+				if (physical_type.columns != type.columns)
+					store_swiz = swizzle_lut[type.columns - 1];
+
+				if (rhs_transpose)
+				{
+					// If RHS is also transposed, we can just copy row by row.
+					for (uint32_t i = 0; i < type.vecsize; i++)
+					{
+						statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ",
+						          to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];");
+					}
+				}
+				else
+				{
+					auto vector_type = expression_type(rhs_expression);
+					vector_type.vecsize = vector_type.columns;
+					vector_type.columns = 1;
+
+					// Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+					// so pick out individual components instead.
+					for (uint32_t i = 0; i < type.vecsize; i++)
+					{
+						string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+						for (uint32_t j = 0; j < vector_type.vecsize; j++)
+						{
+							rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]");
+							if (j + 1 < vector_type.vecsize)
+								rhs_row += ", ";
+						}
+						rhs_row += ")";
+
+						statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";");
+					}
+				}
+
+				// We're dealing with transpose manually.
+				lhs_e->need_transpose = true;
+			}
+			else
+			{
+				const char *store_swiz = "";
+				if (physical_type.vecsize != type.vecsize)
+					store_swiz = swizzle_lut[type.vecsize - 1];
+
+				if (rhs_transpose)
+				{
+					auto vector_type = expression_type(rhs_expression);
+					vector_type.columns = 1;
+
+					// Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+					// so pick out individual components instead.
+					for (uint32_t i = 0; i < type.columns; i++)
+					{
+						string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+						for (uint32_t j = 0; j < vector_type.vecsize; j++)
+						{
+							// Need to explicitly unpack expression since we've mucked with transpose state.
+							auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression);
+							rhs_row += join(unpacked_expr, "[", j, "][", i, "]");
+							if (j + 1 < vector_type.vecsize)
+								rhs_row += ", ";
+						}
+						rhs_row += ")";
+
+						statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";");
+					}
+				}
+				else
+				{
+					// Copy column-by-column.
+					for (uint32_t i = 0; i < type.columns; i++)
+					{
+						statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ",
+						          to_enclosed_unpacked_expression(rhs_expression), "[", i, "];");
+					}
+				}
+			}
+
+			// We're dealing with transpose manually.
+			if (rhs_transpose)
+				rhs_e->need_transpose = true;
+		}
+		else if (transpose)
+		{
+			lhs_e->need_transpose = false;
+
+			// Storing a column to a row-major matrix. Unroll the write.
+			for (uint32_t c = 0; c < type.vecsize; c++)
+			{
+				auto lhs_expr = to_enclosed_expression(lhs_expression);
+				auto column_index = lhs_expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
+					          to_extract_component_expression(rhs_expression, c), ";");
+				}
+			}
+
+			lhs_e->need_transpose = true;
+		}
+		else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize)
+		{
+			assert(type.vecsize >= 1 && type.vecsize <= 3);
+
+			// If we have packed types, we cannot use swizzled stores.
+			// We could technically unroll the store for each element if needed.
+			// When remapping to a std140 physical type, we always get float4,
+			// and the packed decoration should always be removed.
+			assert(!lhs_packed_type);
+
+			string lhs = to_dereferenced_expression(lhs_expression);
+			string rhs = to_pointer_expression(rhs_expression);
+
+			// Unpack the expression so we can store to it with a float or float2.
+			// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
+			lhs = enclose_expression(lhs) + swizzle_lut[type.vecsize - 1];
+			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+				statement(lhs, " = ", rhs, ";");
+		}
+		else if (!is_matrix(type))
+		{
+			string lhs = to_dereferenced_expression(lhs_expression);
+			string rhs = to_pointer_expression(rhs_expression);
+			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+				statement(lhs, " = ", rhs, ";");
+		}
 
-		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
-			statement(lhs, " = ", rhs, ";");
 		register_write(lhs_expression);
 	}
 }
 
 // Converts the format of the current expression from packed to unpacked,
 // by wrapping the expression in a constructor of the appropriate type.
-string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
+// Also, handle special physical ID remapping scenarios, similar to emit_store_statement().
+string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id,
+                                           bool packed, bool row_major)
 {
-	const SPIRType *packed_type = nullptr;
-	if (packed_type_id)
-		packed_type = &get<SPIRType>(packed_type_id);
+	// Trivial case, nothing to do.
+	if (physical_type_id == 0 && !packed)
+		return expr_str;
+
+	const SPIRType *physical_type = nullptr;
+	if (physical_type_id)
+		physical_type = &get<SPIRType>(physical_type_id);
+
+	static const char *swizzle_lut[] = {
+		".x",
+		".xy",
+		".xyz",
+	};
+
+	// std140 array cases for vectors.
+	if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && physical_type->vecsize > type.vecsize)
+	{
+		assert(type.vecsize >= 1 && type.vecsize <= 3);
+		return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
+	}
+	else if (is_matrix(type))
+	{
+		// Packed matrices are stored as arrays of packed vectors. Unfortunately,
+		// we can't just pass the array straight to the matrix constructor. We have to
+		// pass each vector individually, so that they can be unpacked to normal vectors.
+		if (!physical_type)
+			physical_type = &type;
+
+		uint32_t vecsize = type.vecsize;
+		uint32_t columns = type.columns;
+		if (row_major)
+			swap(vecsize, columns);
+
+		uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize;
+
+		const char *base_type = type.width == 16 ? "half" : "float";
+		string unpack_expr = join(base_type, columns, "x", vecsize, "(");
+
+		const char *load_swiz = "";
 
-	// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
-	if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
-		return enclose_expression(expr_str) + ".x";
-	else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
-		return enclose_expression(expr_str) + ".xy";
+		if (physical_vecsize != vecsize)
+			load_swiz = swizzle_lut[vecsize - 1];
+
+		for (uint32_t i = 0; i < columns; i++)
+		{
+			if (i > 0)
+				unpack_expr += ", ";
+
+			if (packed)
+				unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz);
+			else
+				unpack_expr += join(expr_str, "[", i, "]", load_swiz);
+		}
+
+		unpack_expr += ")";
+		return unpack_expr;
+	}
 	else
 		return join(type_to_glsl(type), "(", expr_str, ")");
 }
@@ -2399,6 +3225,39 @@ void CompilerMSL::emit_custom_functions()
 		if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
 			spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
 
+	if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
+	{
+		// Unfortunately, this one needs a lot of the other functions to compile OK.
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW(
+			    "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
+		spv_function_implementations.insert(SPVFuncImplForwardArgs);
+		spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
+		if (msl_options.swizzle_texture_samples)
+			spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
+		for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+		     i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+			spv_function_implementations.insert(static_cast<SPVFuncImpl>(i));
+		spv_function_implementations.insert(SPVFuncImplExpandITUFullRange);
+		spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
+	}
+
+	for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+	     i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+		if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i)))
+			spv_function_implementations.insert(SPVFuncImplForwardArgs);
+
+	if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
+	    spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
+	    spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
+	{
+		spv_function_implementations.insert(SPVFuncImplForwardArgs);
+		spv_function_implementations.insert(SPVFuncImplGetSwizzle);
+	}
+
 	for (auto &spv_func : spv_function_implementations)
 	{
 		switch (spv_func)
@@ -2406,7 +3265,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplMod:
 			statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
 			statement("template<typename Tx, typename Ty>");
-			statement("Tx mod(Tx x, Ty y)");
+			statement("inline Tx mod(Tx x, Ty y)");
 			begin_scope();
 			statement("return x - y * floor(x / y);");
 			end_scope();
@@ -2416,7 +3275,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplRadians:
 			statement("// Implementation of the GLSL radians() function");
 			statement("template<typename T>");
-			statement("T radians(T d)");
+			statement("inline T radians(T d)");
 			begin_scope();
 			statement("return d * T(0.01745329251);");
 			end_scope();
@@ -2426,7 +3285,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplDegrees:
 			statement("// Implementation of the GLSL degrees() function");
 			statement("template<typename T>");
-			statement("T degrees(T r)");
+			statement("inline T degrees(T r)");
 			begin_scope();
 			statement("return r * T(57.2957795131);");
 			end_scope();
@@ -2436,7 +3295,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindILsb:
 			statement("// Implementation of the GLSL findLSB() function");
 			statement("template<typename T>");
-			statement("T findLSB(T x)");
+			statement("inline T spvFindLSB(T x)");
 			begin_scope();
 			statement("return select(ctz(x), T(-1), x == T(0));");
 			end_scope();
@@ -2446,7 +3305,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindUMsb:
 			statement("// Implementation of the unsigned GLSL findMSB() function");
 			statement("template<typename T>");
-			statement("T findUMSB(T x)");
+			statement("inline T spvFindUMSB(T x)");
 			begin_scope();
 			statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
 			end_scope();
@@ -2456,7 +3315,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindSMsb:
 			statement("// Implementation of the signed GLSL findMSB() function");
 			statement("template<typename T>");
-			statement("T findSMSB(T x)");
+			statement("inline T spvFindSMSB(T x)");
 			begin_scope();
 			statement("T v = select(x, T(-1) - x, x < T(0));");
 			statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
@@ -2467,7 +3326,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplSSign:
 			statement("// Implementation of the GLSL sign() function for integer types");
 			statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
-			statement("T sign(T x)");
+			statement("inline T sign(T x)");
 			begin_scope();
 			statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
 			end_scope();
@@ -2475,40 +3334,27 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplArrayCopy:
-			statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
-			          "assignment.");
-			statement("template<typename T, uint N>");
-			statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])");
-			begin_scope();
-			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
-			end_scope();
-			statement("");
-
-			statement("template<typename T, uint N>");
-			statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])");
-			begin_scope();
-			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
-			end_scope();
-			statement("");
-			break;
-
 		case SPVFuncImplArrayOfArrayCopy2Dim:
 		case SPVFuncImplArrayOfArrayCopy3Dim:
 		case SPVFuncImplArrayOfArrayCopy4Dim:
 		case SPVFuncImplArrayOfArrayCopy5Dim:
 		case SPVFuncImplArrayOfArrayCopy6Dim:
 		{
+			// Unfortunately we cannot template on the address space, so combinatorial explosion it is.
 			static const char *function_name_tags[] = {
-				"FromStack",
-				"FromConstant",
+				"FromConstantToStack",    "FromConstantToThreadGroup", "FromStackToStack",
+				"FromStackToThreadGroup", "FromThreadGroupToStack",    "FromThreadGroupToThreadGroup",
 			};
 
 			static const char *src_address_space[] = {
-				"thread const",
-				"constant",
+				"constant", "constant", "thread const", "thread const", "threadgroup const", "threadgroup const",
 			};
 
-			for (uint32_t variant = 0; variant < 2; variant++)
+			static const char *dst_address_space[] = {
+				"thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
+			};
+
+			for (uint32_t variant = 0; variant < 6; variant++)
 			{
 				uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
 				string tmp = "template<typename T";
@@ -2528,17 +3374,23 @@ void CompilerMSL::emit_custom_functions()
 					array_arg += "]";
 				}
 
-				statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg,
-				          ", ", src_address_space[variant], " T (&src)", array_arg, ")");
+				statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(",
+				          dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant],
+				          " T (&src)", array_arg, ")");
 
 				begin_scope();
 				statement("for (uint i = 0; i < A; i++)");
 				begin_scope();
-				statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
+
+				if (dimensions == 1)
+					statement("dst[i] = src[i];");
+				else
+					statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
 				end_scope();
 				end_scope();
 				statement("");
 			}
+
 			break;
 		}
 
@@ -2546,7 +3398,7 @@ void CompilerMSL::emit_custom_functions()
 		{
 			string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
 			statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
-			statement("uint2 spvTexelBufferCoord(uint tc)");
+			statement("inline uint2 spvTexelBufferCoord(uint tc)");
 			begin_scope();
 			statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
 			end_scope();
@@ -2572,7 +3424,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-			statement("float4x4 spvInverse4x4(float4x4 m)");
+			statement("inline float4x4 spvInverse4x4(float4x4 m)");
 			begin_scope();
 			statement("float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 			statement_no_indent("");
@@ -2637,7 +3489,7 @@ void CompilerMSL::emit_custom_functions()
 
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-			statement("float3x3 spvInverse3x3(float3x3 m)");
+			statement("inline float3x3 spvInverse3x3(float3x3 m)");
 			begin_scope();
 			statement("float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 			statement_no_indent("");
@@ -2667,7 +3519,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplInverse2x2:
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-			statement("float2x2 spvInverse2x2(float2x2 m)");
+			statement("inline float2x2 spvInverse2x2(float2x2 m)");
 			begin_scope();
 			statement("float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 			statement_no_indent("");
@@ -2688,65 +3540,24 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			break;
 
-		case SPVFuncImplRowMajor2x3:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
-			begin_scope();
-			statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor2x4:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
-			begin_scope();
-			statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
-			          "m[1][3]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor3x2:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
-			begin_scope();
-			statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor3x4:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
-			begin_scope();
-			statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
-			          "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor4x2:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
+		case SPVFuncImplForwardArgs:
+			statement("template<typename T> struct spvRemoveReference { typedef T type; };");
+			statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
+			statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
+			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+			          "spvRemoveReference<T>::type& x)");
 			begin_scope();
-			statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
-			          "float2(m[1][1], m[3][1]));");
+			statement("return static_cast<thread T&&>(x);");
 			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor4x3:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
+			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+			          "spvRemoveReference<T>::type&& x)");
 			begin_scope();
-			statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
-			          "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
+			statement("return static_cast<thread T&&>(x);");
 			end_scope();
 			statement("");
 			break;
 
-		case SPVFuncImplTextureSwizzle:
+		case SPVFuncImplGetSwizzle:
 			statement("enum class spvSwizzle : uint");
 			begin_scope();
 			statement("none = 0,");
@@ -2758,20 +3569,6 @@ void CompilerMSL::emit_custom_functions()
 			statement("alpha");
 			end_scope_decl();
 			statement("");
-			statement("template<typename T> struct spvRemoveReference { typedef T type; };");
-			statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
-			statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
-			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
-			          "spvRemoveReference<T>::type& x)");
-			begin_scope();
-			statement("return static_cast<thread T&&>(x);");
-			end_scope();
-			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
-			          "spvRemoveReference<T>::type&& x)");
-			begin_scope();
-			statement("return static_cast<thread T&&>(x);");
-			end_scope();
-			statement("");
 			statement("template<typename T>");
 			statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
 			begin_scope();
@@ -2794,6 +3591,9 @@ void CompilerMSL::emit_custom_functions()
 			end_scope();
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplTextureSwizzle:
 			statement("// Wrapper function that swizzles texture samples and fetches.");
 			statement("template<typename T>");
 			statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
@@ -2812,11 +3612,14 @@ void CompilerMSL::emit_custom_functions()
 			statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplGatherSwizzle:
 			statement("// Wrapper function that swizzles texture gathers.");
-			statement("template<typename T, typename Tex, typename... Ts>");
-			statement(
-			    "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
-			    "METAL_CONST_ARG(c)");
+			statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+			          "typename... Ts>");
+			statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, "
+			          "uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -2853,10 +3656,14 @@ void CompilerMSL::emit_custom_functions()
 			end_scope();
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplGatherCompareSwizzle:
 			statement("// Wrapper function that swizzles depth texture gathers.");
-			statement("template<typename T, typename Tex, typename... Ts>");
-			statement(
-			    "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) ");
+			statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+			          "typename... Ts>");
+			statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler "
+			          "s, uint sw, Ts... params) ");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -2877,6 +3684,828 @@ void CompilerMSL::emit_custom_functions()
 			statement("return t.gather_compare(s, spvForward<Ts>(params)...);");
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBallot:
+			statement("inline uint4 spvSubgroupBallot(bool value)");
+			begin_scope();
+			statement("simd_vote vote = simd_ballot(value);");
+			statement("// simd_ballot() returns a 64-bit integer-like object, but");
+			statement("// SPIR-V callers expect a uint4. We must convert.");
+			statement("// FIXME: This won't include higher bits if Apple ever supports");
+			statement("// 128 lanes in an SIMD-group.");
+			statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> "
+			          "32) & 0xFFFFFFFF), 0, 0);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBallotBitExtract:
+			statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)");
+			begin_scope();
+			statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBallotFindLSB:
+			statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
+			begin_scope();
+			statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
+			          "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBallotFindMSB:
+			statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
+			begin_scope();
+			statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
+			          "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
+			          "ballot.z == 0), ballot.w == 0);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBallotBitCount:
+			statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
+			begin_scope();
+			statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
+			end_scope();
+			statement("");
+			statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
+			begin_scope();
+			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
+			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
+			          "uint2(0));");
+			statement("return spvSubgroupBallotBitCount(ballot & mask);");
+			end_scope();
+			statement("");
+			statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
+			begin_scope();
+			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
+			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
+			statement("return spvSubgroupBallotBitCount(ballot & mask);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupAllEqual:
+			// Metal doesn't provide a function to evaluate this directly. But, we can
+			// implement this by comparing every thread's value to one thread's value
+			// (in this case, the value of the first active thread). Then, by the transitive
+			// property of equality, if all comparisons return true, then they are all equal.
+			statement("template<typename T>");
+			statement("inline bool spvSubgroupAllEqual(T value)");
+			begin_scope();
+			statement("return simd_all(value == simd_broadcast_first(value));");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupAllEqual(bool value)");
+			begin_scope();
+			statement("return simd_all(value) || !simd_any(value);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplReflectScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvReflect(T i, T n)");
+			begin_scope();
+			statement("return i - T(2) * i * n * n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplRefractScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvRefract(T i, T n, T eta)");
+			begin_scope();
+			statement("T NoI = n * i;");
+			statement("T NoI2 = NoI * NoI;");
+			statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
+			statement("if (k < T(0))");
+			begin_scope();
+			statement("return T(0);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+			end_scope();
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplFaceForwardScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvFaceForward(T n, T i, T nref)");
+			begin_scope();
+			statement("return i * nref < T(0) ? n : -n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructNearest2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler "
+			          "samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructNearest3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, "
+			          "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422CositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+			          "plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+			begin_scope();
+			statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+			end_scope();
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422CositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+			          "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+			begin_scope();
+			statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+			statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			end_scope();
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422Midpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+			          "plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+			statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422Midpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+			          "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+			statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+			statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplExpandITUFullRange:
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)");
+			begin_scope();
+			statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplExpandITUNarrowRange:
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)");
+			begin_scope();
+			statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);");
+			statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT709:
+			statement("// cf. Khronos Data Format Specification, section 15.1.1");
+			statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, "
+			          "-0.33480248/0.7152, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT601:
+			statement("// cf. Khronos Data Format Specification, section 15.1.2");
+			statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, "
+			          "-0.419198/0.587, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT2020:
+			statement("// cf. Khronos Data Format Specification, section 15.1.3");
+			statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, "
+			          "-0.38737742/0.6780, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplDynamicImageSampler:
+			statement("enum class spvFormatResolution");
+			begin_scope();
+			statement("_444 = 0,");
+			statement("_422,");
+			statement("_420");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvChromaFilter");
+			begin_scope();
+			statement("nearest = 0,");
+			statement("linear");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvXChromaLocation");
+			begin_scope();
+			statement("cosited_even = 0,");
+			statement("midpoint");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYChromaLocation");
+			begin_scope();
+			statement("cosited_even = 0,");
+			statement("midpoint");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYCbCrModelConversion");
+			begin_scope();
+			statement("rgb_identity = 0,");
+			statement("ycbcr_identity,");
+			statement("ycbcr_bt_709,");
+			statement("ycbcr_bt_601,");
+			statement("ycbcr_bt_2020");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYCbCrRange");
+			begin_scope();
+			statement("itu_full = 0,");
+			statement("itu_narrow");
+			end_scope_decl();
+			statement("");
+			statement("struct spvComponentBits");
+			begin_scope();
+			statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}");
+			statement("uchar value : 6;");
+			end_scope_decl();
+			statement("// A class corresponding to metal::sampler which holds sampler");
+			statement("// Y'CbCr conversion info.");
+			statement("struct spvYCbCrSampler");
+			begin_scope();
+			statement("constexpr spvYCbCrSampler() thread : val(build()) {}");
+			statement("template<typename... Ts>");
+			statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}");
+			statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;");
+			statement("");
+			statement("spvFormatResolution get_resolution() const thread");
+			begin_scope();
+			statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);");
+			end_scope();
+			statement("spvChromaFilter get_chroma_filter() const thread");
+			begin_scope();
+			statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);");
+			end_scope();
+			statement("spvXChromaLocation get_x_chroma_offset() const thread");
+			begin_scope();
+			statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);");
+			end_scope();
+			statement("spvYChromaLocation get_y_chroma_offset() const thread");
+			begin_scope();
+			statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);");
+			end_scope();
+			statement("spvYCbCrModelConversion get_ycbcr_model() const thread");
+			begin_scope();
+			statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);");
+			end_scope();
+			statement("spvYCbCrRange get_ycbcr_range() const thread");
+			begin_scope();
+			statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);");
+			end_scope();
+			statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }");
+			statement("");
+			statement("private:");
+			statement("ushort val;");
+			statement("");
+			statement("constexpr static constant ushort resolution_bits = 2;");
+			statement("constexpr static constant ushort chroma_filter_bits = 2;");
+			statement("constexpr static constant ushort x_chroma_off_bit = 1;");
+			statement("constexpr static constant ushort y_chroma_off_bit = 1;");
+			statement("constexpr static constant ushort ycbcr_model_bits = 3;");
+			statement("constexpr static constant ushort ycbcr_range_bit = 1;");
+			statement("constexpr static constant ushort bpc_bits = 6;");
+			statement("");
+			statement("constexpr static constant ushort resolution_base = 0;");
+			statement("constexpr static constant ushort chroma_filter_base = 2;");
+			statement("constexpr static constant ushort x_chroma_off_base = 4;");
+			statement("constexpr static constant ushort y_chroma_off_base = 5;");
+			statement("constexpr static constant ushort ycbcr_model_base = 6;");
+			statement("constexpr static constant ushort ycbcr_range_base = 9;");
+			statement("constexpr static constant ushort bpc_base = 10;");
+			statement("");
+			statement(
+			    "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;");
+			statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << "
+			          "chroma_filter_base;");
+			statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << "
+			          "x_chroma_off_base;");
+			statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << "
+			          "y_chroma_off_base;");
+			statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << "
+			          "ycbcr_model_base;");
+			statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << "
+			          "ycbcr_range_base;");
+			statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;");
+			statement("");
+			statement("static constexpr ushort build()");
+			begin_scope();
+			statement("return 0;");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvFormatResolution res, Ts... t)");
+			begin_scope();
+			statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)");
+			begin_scope();
+			statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)");
+			begin_scope();
+			statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)");
+			begin_scope();
+			statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);");
+			end_scope();
+			end_scope_decl();
+			statement("");
+			statement("// A class which can hold up to three textures and a sampler, including");
+			statement("// Y'CbCr conversion info, used to pass combined image-samplers");
+			statement("// dynamically to functions.");
+			statement("template<typename T>");
+			statement("struct spvDynamicImageSampler");
+			begin_scope();
+			statement("texture2d<T> plane0;");
+			statement("texture2d<T> plane1;");
+			statement("texture2d<T> plane2;");
+			statement("sampler samp;");
+			statement("spvYCbCrSampler ycbcr_samp;");
+			statement("uint swizzle = 0;");
+			statement("");
+			if (msl_options.swizzle_texture_samples)
+			{
+				statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :");
+				statement("    plane0(tex), samp(samp), swizzle(sw) {}");
+			}
+			else
+			{
+				statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :");
+				statement("    plane0(tex), samp(samp) {}");
+			}
+			statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, "
+			          "uint sw) thread :");
+			statement("    plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+			statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,");
+			statement("                                 sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+			statement("    plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+			statement(
+			    "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,");
+			statement("                                 sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+			statement("    plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), "
+			          "swizzle(sw) {}");
+			statement("");
+			// XXX This is really hard to follow... I've left comments to make it a bit easier.
+			statement("template<typename... LodOptions>");
+			statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread");
+			begin_scope();
+			statement("if (!is_null_texture(plane1))");
+			begin_scope();
+			statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||");
+			statement("    ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)");
+			begin_scope();
+			statement("if (!is_null_texture(plane2))");
+			statement("    return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,");
+			statement("                                       spvForward<LodOptions>(options)...);");
+			statement(
+			    "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);");
+			end_scope(); // if (resolution == 422 || chroma_filter == nearest)
+			statement("switch (ycbcr_samp.get_resolution())");
+			begin_scope();
+			statement("case spvFormatResolution::_444: break;");
+			statement("case spvFormatResolution::_422:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_x_chroma_offset())");
+			begin_scope();
+			statement("case spvXChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear422CositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear422CositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvXChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear422Midpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear422Midpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (x_chroma_offset)
+			end_scope(); // case 422:
+			statement("case spvFormatResolution::_420:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_x_chroma_offset())");
+			begin_scope();
+			statement("case spvXChromaLocation::cosited_even:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_y_chroma_offset())");
+			begin_scope();
+			statement("case spvYChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvYChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (y_chroma_offset)
+			end_scope(); // case x::cosited_even:
+			statement("case spvXChromaLocation::midpoint:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_y_chroma_offset())");
+			begin_scope();
+			statement("case spvYChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XMidpointYCositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XMidpointYCositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvYChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XMidpointYMidpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XMidpointYMidpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (y_chroma_offset)
+			end_scope(); // case x::midpoint
+			end_scope(); // switch (x_chroma_offset)
+			end_scope(); // case 420:
+			end_scope(); // switch (resolution)
+			end_scope(); // if (multiplanar)
+			statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);");
+			end_scope(); // do_sample()
+			statement("template <typename... LodOptions>");
+			statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread");
+			begin_scope();
+			statement(
+			    "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);");
+			statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)");
+			statement("    return s;");
+			statement("");
+			statement("switch (ycbcr_samp.get_ycbcr_range())");
+			begin_scope();
+			statement("case spvYCbCrRange::itu_full:");
+			statement("    s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());");
+			statement("    break;");
+			statement("case spvYCbCrRange::itu_narrow:");
+			statement("    s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());");
+			statement("    break;");
+			end_scope();
+			statement("");
+			statement("switch (ycbcr_samp.get_ycbcr_model())");
+			begin_scope();
+			statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning
+			statement("case spvYCbCrModelConversion::ycbcr_identity:");
+			statement("    return s;");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_709:");
+			statement("    return spvConvertYCbCrBT709(s);");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_601:");
+			statement("    return spvConvertYCbCrBT601(s);");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_2020:");
+			statement("    return spvConvertYCbCrBT2020(s);");
+			end_scope();
+			end_scope();
+			statement("");
+			// Sampler Y'CbCr conversion forbids offsets.
+			statement("vec<T, 4> sample(float2 coord, int2 offset) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);");
+			else
+				statement("return plane0.sample(samp, coord, offset);");
+			end_scope();
+			statement("template<typename lod_options>");
+			statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);");
+			else
+				statement("return plane0.sample(samp, coord, options, offset);");
+			end_scope();
+			statement("#if __HAVE_MIN_LOD_CLAMP__");
+			statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread");
+			begin_scope();
+			statement("return plane0.sample(samp, coord, b, min_lod, offset);");
+			end_scope();
+			statement(
+			    "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread");
+			begin_scope();
+			statement("return plane0.sample(samp, coord, grad, min_lod, offset);");
+			end_scope();
+			statement("#endif");
+			statement("");
+			// Y'CbCr conversion forbids all operations but sampling.
+			statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread");
+			begin_scope();
+			statement("return plane0.read(coord, lod);");
+			end_scope();
+			statement("");
+			statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);");
+			else
+				statement("return plane0.gather(samp, coord, offset, c);");
+			end_scope();
+			end_scope_decl();
+			statement("");
 
 		default:
 			break;
@@ -2938,10 +4567,27 @@ void CompilerMSL::emit_resources()
 void CompilerMSL::emit_specialization_constants_and_structs()
 {
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 	bool emitted = false;
 
 	unordered_set<uint32_t> declared_structs;
+	unordered_set<uint32_t> aligned_structs;
+
+	// First, we need to deal with scalar block layout.
+	// It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself.
+	// In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types.
+	// This makes the struct alignment as small as physically possible.
+	// When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) {
+		if (type.basetype == SPIRType::Struct &&
+		    has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+			mark_scalar_layout_structs(type);
+	});
+
+	// Very particular use of the soft loop lock.
+	// align_struct may need to create custom types on the fly, but we don't care about
+	// these types for purpose of iterating over them in ir.ids_for_type and friends.
+	auto loop_lock = ir.create_loop_soft_lock();
 
 	for (auto &id_ : ir.ids_for_constant_or_type)
 	{
@@ -3015,7 +4661,7 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 			// Output non-builtin interface structs. These include local function structs
 			// and structs nested within uniform and read-write buffers.
 			auto &type = id.get<SPIRType>();
-			uint32_t type_id = type.self;
+			TypeID type_id = type.self;
 
 			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty();
 			bool is_block =
@@ -3043,8 +4689,8 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 
 				declared_structs.insert(type_id);
 
-				if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked))
-					align_struct(type);
+				if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+					align_struct(type, aligned_structs);
 
 				// Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
 				emit_struct(get<SPIRType>(type_id));
@@ -3154,7 +4800,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 						}
 						else if (is_array(mbr_type))
 						{
-							for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++)
+							for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++)
 							{
 								set<SPIRConstant>(const_mbr_id, type_id, index, false);
 								auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr,
@@ -3183,7 +4829,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 				else // Must be an array
 				{
 					assert(is_array(*type));
-					for (uint32_t j = 0; j < type->array[0]; j++, index++)
+					for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++)
 					{
 						set<SPIRConstant>(const_mbr_id, type_id, index, false);
 						auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true);
@@ -3230,9 +4876,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 
 		// Mark the result as being packed if necessary.
 		if (meta.storage_is_packed)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
 		if (meta.storage_is_invariant)
 			set_decoration(ops[1], DecorationInvariant);
 
@@ -3251,7 +4897,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 	// expression so we don't try to dereference it as a variable pointer.
 	// Don't do this if the index is a constant 1, though. We need to drop stores
 	// to that one.
-	auto *m = ir.find_meta(var ? var->self : 0);
+	auto *m = ir.find_meta(var ? var->self : ID(0));
 	if (get_execution_model() == ExecutionModelTessellationControl && var && m &&
 	    m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles))
 	{
@@ -3436,21 +5082,36 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	// Bitfield
 	case OpBitFieldInsert:
-		MSL_QFOP(insert_bits);
+	{
+		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt);
 		break;
+	}
 
 	case OpBitFieldSExtract:
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type,
+		                                SPIRType::UInt, SPIRType::UInt);
+		break;
+	}
+
 	case OpBitFieldUExtract:
-		MSL_TFOP(extract_bits);
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type,
+		                                SPIRType::UInt, SPIRType::UInt);
 		break;
+	}
 
 	case OpBitReverse:
+		// BitReverse does not have issues with sign since result type must match input type.
 		MSL_UFOP(reverse_bits);
 		break;
 
 	case OpBitCount:
-		MSL_UFOP(popcount);
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype);
 		break;
+	}
 
 	case OpFRem:
 		MSL_BFOP(fmod);
@@ -3628,11 +5289,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		auto store_type = texel_type;
 		store_type.vecsize = 4;
 
-		statement(join(
-		    to_expression(img_id), ".write(", remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)),
-		    ", ",
-		    to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, &forward),
-		    ");"));
+		statement(join(to_expression(img_id), ".write(",
+		               remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
+		               to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0,
+		                                0, &forward),
+		               ");"));
 
 		if (p_var && variable_storage_is_aliased(*p_var))
 			flush_all_aliased_variables();
@@ -3686,7 +5347,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	}
 
 	case OpImageQueryLod:
-		SPIRV_CROSS_THROW("MSL does not support textureQueryLod().");
+	{
+		if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up.");
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t image_id = ops[2];
+		uint32_t coord_id = ops[3];
+		emit_uninitialized_temporary_expression(result_type, id);
+
+		auto sampler_expr = to_sampler_expression(image_id);
+		auto *combined = maybe_get<SPIRCombinedImageSampler>(image_id);
+		auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id);
+
+		// TODO: It is unclear if calculcate_clamped_lod also conditionally rounds
+		// the reported LOD based on the sampler. NEAREST miplevel should
+		// round the LOD, but LINEAR miplevel should not round.
+		// Let's hope this does not become an issue ...
+		statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ",
+		          to_expression(coord_id), ");");
+		statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ",
+		          to_expression(coord_id), ");");
+		register_control_dependent_expression(id);
+		break;
+	}
 
 #define MSL_ImgQry(qrytype)                                                                 \
 	do                                                                                      \
@@ -3723,10 +5407,14 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 		{
-			auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
 			auto *var = maybe_get_backing_variable(ops[2]);
+			SPIRExpression *e;
+			if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler))
+				e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true);
+			else
+				e = &emit_op(result_type, id, to_expression(ops[2]), true, true);
 			if (var)
-				e.loaded_from = var->self;
+				e->loaded_from = var->self;
 		}
 		break;
 	}
@@ -3801,36 +5489,31 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 			emit_barrier(ops[0], ops[1], ops[2]);
 		break;
 
-	case OpVectorTimesMatrix:
-	case OpMatrixTimesVector:
+	case OpOuterProduct:
 	{
-		// If the matrix needs transpose and it is square or packed, just flip the multiply order.
-		uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
-		auto *e = maybe_get<SPIRExpression>(mtx_id);
-		auto &t = expression_type(mtx_id);
-		bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
-		if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
-		{
-			e->need_transpose = false;
-
-			// This is important for matrices. Packed matrices
-			// are generally transposed, so unpacking using a constructor argument
-			// will result in an error.
-			// The simplest solution for now is to just avoid unpacking the matrix in this operation.
-			unset_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
 
-			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
-			if (is_packed)
-				set_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
-			e->need_transpose = true;
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
 		}
-		else
-			MSL_BOP(*);
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
 		break;
 	}
 
-		// OpOuterProduct
-
 	case OpIAddCarry:
 	case OpISubBorrow:
 	{
@@ -3838,10 +5521,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_id = ops[1];
 		uint32_t op0 = ops[2];
 		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
 		auto &type = get<SPIRType>(result_type);
-		statement(variable_decl(type, to_name(result_id)), ";");
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+		emit_uninitialized_temporary_expression(result_type, result_id);
 
 		auto &res_type = get<SPIRType>(type.member_types[1]);
 		if (opcode == OpIAddCarry)
@@ -3870,10 +5551,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_id = ops[1];
 		uint32_t op0 = ops[2];
 		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
 		auto &type = get<SPIRType>(result_type);
-		statement(variable_decl(type, to_name(result_id)), ";");
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+		emit_uninitialized_temporary_expression(result_type, result_id);
 
 		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ",
 		          to_enclosed_expression(op1), ";");
@@ -3882,6 +5561,91 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpArrayLength:
+	{
+		auto &type = expression_type(ops[2]);
+		uint32_t offset = type_struct_member_offset(type, ops[3]);
+		uint32_t stride = type_struct_member_array_stride(type, ops[3]);
+
+		auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride);
+		emit_op(ops[0], ops[1], expr, true);
+		break;
+	}
+
+	// SPV_INTEL_shader_integer_functions2
+	case OpUCountLeadingZerosINTEL:
+		MSL_UFOP(clz);
+		break;
+
+	case OpUCountTrailingZerosINTEL:
+		MSL_UFOP(ctz);
+		break;
+
+	case OpAbsISubINTEL:
+	case OpAbsUSubINTEL:
+		MSL_BFOP(absdiff);
+		break;
+
+	case OpIAddSatINTEL:
+	case OpUAddSatINTEL:
+		MSL_BFOP(addsat);
+		break;
+
+	case OpIAverageINTEL:
+	case OpUAverageINTEL:
+		MSL_BFOP(hadd);
+		break;
+
+	case OpIAverageRoundedINTEL:
+	case OpUAverageRoundedINTEL:
+		MSL_BFOP(rhadd);
+		break;
+
+	case OpISubSatINTEL:
+	case OpUSubSatINTEL:
+		MSL_BFOP(subsat);
+		break;
+
+	case OpIMul32x16INTEL:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2], b = ops[3];
+		bool forward = should_forward(a) && should_forward(b);
+		emit_op(result_type, id, join("int(short(", to_expression(a), ")) * int(short(", to_expression(b), "))"),
+		        forward);
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
+	case OpUMul32x16INTEL:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2], b = ops[3];
+		bool forward = should_forward(a) && should_forward(b);
+		emit_op(result_type, id, join("uint(ushort(", to_expression(a), ")) * uint(ushort(", to_expression(b), "))"),
+		        forward);
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
+	case OpIsHelperInvocationEXT:
+		if (msl_options.is_ios())
+			SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS.");
+		else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+			SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
+		emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
+		break;
+
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (!msl_options.supports_msl_version(2, 0))
+			SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -3895,33 +5659,70 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl)
 		return;
 
-	string bar_stmt = "threadgroup_barrier(mem_flags::";
+	uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation);
+	uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation);
+	// Use the wider of the two scopes (smaller value)
+	exe_scope = min(exe_scope, mem_scope);
+
+	string bar_stmt;
+	if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
+		bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
+	else
+		bar_stmt = "threadgroup_barrier";
+	bar_stmt += "(";
 
 	uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone);
 
-	if (get_execution_model() == ExecutionModelTessellationControl)
+	// Use the | operator to combine flags if we can.
+	if (msl_options.supports_msl_version(1, 2))
+	{
+		string mem_flags = "";
 		// For tesc shaders, this also affects objects in the Output storage class.
 		// Since in Metal, these are placed in a device buffer, we have to sync device memory here.
-		bar_stmt += "mem_device";
-	else if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask)
-		bar_stmt += "mem_device";
-	else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
-	                    MemorySemanticsAtomicCounterMemoryMask))
-		bar_stmt += "mem_threadgroup";
-	else if (mem_sem & MemorySemanticsImageMemoryMask)
-		bar_stmt += "mem_texture";
+		if (get_execution_model() == ExecutionModelTessellationControl ||
+		    (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
+			mem_flags += "mem_flags::mem_device";
+		if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+		               MemorySemanticsAtomicCounterMemoryMask))
+		{
+			if (!mem_flags.empty())
+				mem_flags += " | ";
+			mem_flags += "mem_flags::mem_threadgroup";
+		}
+		if (mem_sem & MemorySemanticsImageMemoryMask)
+		{
+			if (!mem_flags.empty())
+				mem_flags += " | ";
+			mem_flags += "mem_flags::mem_texture";
+		}
+
+		if (mem_flags.empty())
+			mem_flags = "mem_flags::mem_none";
+
+		bar_stmt += mem_flags;
+	}
 	else
-		bar_stmt += "mem_none";
+	{
+		if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) &&
+		    (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+		                MemorySemanticsAtomicCounterMemoryMask)))
+			bar_stmt += "mem_flags::mem_device_and_threadgroup";
+		else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))
+			bar_stmt += "mem_flags::mem_device";
+		else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+		                    MemorySemanticsAtomicCounterMemoryMask))
+			bar_stmt += "mem_flags::mem_threadgroup";
+		else if (mem_sem & MemorySemanticsImageMemoryMask)
+			bar_stmt += "mem_flags::mem_texture";
+		else
+			bar_stmt += "mem_flags::mem_none";
+	}
 
 	if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1)))
 	{
 		bar_stmt += ", ";
 
-		// Use the wider of the two scopes (smaller value)
-		uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation);
-		uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation);
-		uint32_t scope = min(exe_scope, mem_scope);
-		switch (scope)
+		switch (mem_scope)
 		{
 		case ScopeCrossDevice:
 		case ScopeDevice:
@@ -3949,7 +5750,8 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	flush_all_active_variables();
 }
 
-void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage,
+                                  StorageClass rhs_storage)
 {
 	// Assignment from an array initializer is fine.
 	auto &type = expression_type(rhs_id);
@@ -3977,21 +5779,32 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
 		if (type.array.size() > SPVFuncImplArrayCopyMultidimMax)
 			SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
 		auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
-		if (spv_function_implementations.count(func) == 0)
-		{
-			spv_function_implementations.insert(func);
-			suppress_missing_prototypes = true;
-			force_recompile();
-		}
-	}
-	else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0)
-	{
-		spv_function_implementations.insert(SPVFuncImplArrayCopy);
-		suppress_missing_prototypes = true;
-		force_recompile();
+		add_spv_func_and_recompile(func);
 	}
+	else
+		add_spv_func_and_recompile(SPVFuncImplArrayCopy);
+
+	bool lhs_thread = lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction ||
+	                  lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate;
+	bool rhs_thread = rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction ||
+	                  rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate;
+
+	const char *tag = nullptr;
+	if (lhs_thread && is_constant)
+		tag = "FromConstantToStack";
+	else if (lhs_storage == StorageClassWorkgroup && is_constant)
+		tag = "FromConstantToThreadGroup";
+	else if (lhs_thread && rhs_thread)
+		tag = "FromStackToStack";
+	else if (lhs_storage == StorageClassWorkgroup && rhs_thread)
+		tag = "FromStackToThreadGroup";
+	else if (lhs_thread && rhs_storage == StorageClassWorkgroup)
+		tag = "FromThreadGroupToStack";
+	else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
+		tag = "FromThreadGroupToThreadGroup";
+	else
+		SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
 
-	const char *tag = is_constant ? "FromConstant" : "FromStack";
 	statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
 }
 
@@ -4028,7 +5841,8 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
 	if (p_v_lhs)
 		flush_variable_declaration(p_v_lhs->self);
 
-	emit_array_copy(to_expression(id_lhs), id_rhs);
+	emit_array_copy(to_expression(id_lhs), id_rhs, get_backing_variable_storage(id_lhs),
+	                get_backing_variable_storage(id_rhs));
 	register_write(id_lhs);
 
 	return true;
@@ -4039,12 +5853,10 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
                                       uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
                                       bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
 {
-	forced_temporaries.insert(result_id);
-
 	string exp = string(op) + "(";
 
 	auto &type = get_pointee_type(expression_type(obj));
-	exp += "(volatile ";
+	exp += "(";
 	auto *var = maybe_get_backing_variable(obj);
 	if (!var)
 		SPIRV_CROSS_THROW("No backing variable for atomic operation.");
@@ -4079,12 +5891,11 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 		// the CAS loop, otherwise it will loop infinitely, with the comparison test always failing.
 		// The function updates the comparitor value from the memory value, so the additional
 		// comparison test evaluates the memory value against the expected value.
-		statement(variable_decl(type, to_name(result_id)), ";");
+		emit_uninitialized_temporary_expression(result_type, result_id);
 		statement("do");
 		begin_scope();
 		statement(to_name(result_id), " = ", to_expression(op1), ";");
 		end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")"));
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
 	}
 	else
 	{
@@ -4138,12 +5949,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "rint");
 		break;
 
+	case GLSLstd450FindILsb:
+	{
+		// In this template version of findLSB, we return T.
+		auto basetype = expression_type(args[0]).basetype;
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype);
+		break;
+	}
+
 	case GLSLstd450FindSMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type);
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type);
 		break;
 
 	case GLSLstd450FindUMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type);
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type);
 		break;
 
 	case GLSLstd450PackSnorm4x8:
@@ -4267,12 +6086,130 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		//        GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
 		//        GLSLstd450InterpolateAtOffset
 
+	case GLSLstd450Distance:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to length(a - b) -> abs(a - b).
+			emit_op(result_type, id,
+			        join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
+			        should_forward(args[0]) && should_forward(args[1]));
+			inherit_expression_dependencies(id, args[0]);
+			inherit_expression_dependencies(id, args[1]);
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Length:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to abs().
+			emit_unary_func_op(result_type, id, args[0], "abs");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Normalize:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450FaceForward:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Modf:
+	case GLSLstd450Frexp:
+	{
+		// Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary.
+		auto *ptr = maybe_get<SPIRExpression>(args[1]);
+		if (ptr && ptr->access_chain && is_scalar(expression_type(args[1])))
+		{
+			register_call_out_argument(args[1]);
+			forced_temporaries.insert(id);
+
+			// Need to create temporaries and copy over to access chain after.
+			// We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ...
+			uint32_t &tmp_id = extra_sub_expressions[id];
+			if (!tmp_id)
+				tmp_id = ir.increase_bound_by(1);
+
+			uint32_t tmp_type_id = get_pointee_type_id(ptr->expression_type);
+			emit_uninitialized_temporary_expression(tmp_type_id, tmp_id);
+			emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp");
+			statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+	}
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
 	}
 }
 
+void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                        const uint32_t *args, uint32_t count)
+{
+	enum AMDShaderTrinaryMinMax
+	{
+		FMin3AMD = 1,
+		UMin3AMD = 2,
+		SMin3AMD = 3,
+		FMax3AMD = 4,
+		UMax3AMD = 5,
+		SMax3AMD = 6,
+		FMid3AMD = 7,
+		UMid3AMD = 8,
+		SMid3AMD = 9
+	};
+
+	if (!msl_options.supports_msl_version(2, 1))
+		SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1.");
+
+	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
+
+	switch (op)
+	{
+	case FMid3AMD:
+	case UMid3AMD:
+	case SMid3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3");
+		break;
+	default:
+		CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count);
+		break;
+	}
+}
+
 // Emit a structure declaration for the specified interface variable.
 void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
 {
@@ -4293,9 +6230,10 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 		add_function_overload(func);
 
 	local_variable_names = resource_names;
-	string decl;
 
-	processing_entry_point = (func.self == ir.default_entry_point);
+	processing_entry_point = func.self == ir.default_entry_point;
+
+	string decl = processing_entry_point ? "" : "inline ";
 
 	auto &type = get<SPIRType>(func.return_type);
 
@@ -4306,7 +6244,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 	else
 	{
 		// We cannot return arrays in MSL, so "return" through an out variable.
-		decl = "void";
+		decl += "void";
 	}
 
 	decl += " ";
@@ -4338,7 +6276,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 		for (auto var_id : vars_needing_early_declaration)
 		{
 			auto &ed_var = get<SPIRVariable>(var_id);
-			uint32_t &initializer = ed_var.initializer;
+			ID &initializer = ed_var.initializer;
 			if (!initializer)
 				initializer = ir.increase_bound_by(1);
 
@@ -4367,16 +6305,36 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 
 		decl += argument_decl(arg);
 
-		// Manufacture automatic sampler arg for SampledImage texture
+		bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+
 		auto &arg_type = get<SPIRType>(arg.type);
-		if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
-			decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
+		if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler)
+		{
+			// Manufacture automatic plane args for multiplanar texture
+			uint32_t planes = 1;
+			if (auto *constexpr_sampler = find_constexpr_sampler(name_id))
+				if (constexpr_sampler->ycbcr_conversion_enable)
+					planes = constexpr_sampler->planes;
+			for (uint32_t i = 1; i < planes; i++)
+				decl += join(", ", argument_decl(arg), plane_name_suffix, i);
+
+			// Manufacture automatic sampler arg for SampledImage texture
+			if (arg_type.image.dim != DimBuffer)
+				decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
+		}
 
 		// Manufacture automatic swizzle arg.
-		if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type))
+		if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) &&
+		    !is_dynamic_img_sampler)
 		{
 			bool arg_is_array = !arg_type.array.empty();
-			decl += join(", constant uint32_t", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
+			decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
+		}
+
+		if (buffers_requiring_array_length.count(name_id))
+		{
+			bool arg_is_array = !arg_type.array.empty();
+			decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id));
 		}
 
 		if (&arg != &func.arguments.back())
@@ -4387,85 +6345,237 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 	statement(decl);
 }
 
+static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler)
+{
+	// For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images
+	// use implicit reconstruction.
+	return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1;
+}
+
 // Returns the texture sampling function string for the specified image and sampling characteristics.
-string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool,
-                                     bool has_offset, bool, bool has_dref, uint32_t)
+string CompilerMSL::to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool,
+                                     bool, bool, bool has_dref, uint32_t, uint32_t)
 {
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
 	// Special-case gather. We have to alter the component being looked up
 	// in the swizzle case.
-	if (msl_options.swizzle_texture_samples && is_gather)
+	if (msl_options.swizzle_texture_samples && is_gather && !is_dynamic_img_sampler &&
+	    (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
 	{
-		string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
-		fname += "<" + type_to_glsl(get<SPIRType>(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype);
-		// Add the arg types ourselves. Yes, this sucks, but Clang can't
-		// deduce template pack parameters in the middle of an argument list.
-		switch (imgtype.image.dim)
-		{
-		case Dim2D:
-			fname += ", float2";
-			if (imgtype.image.arrayed)
-				fname += ", uint";
-			if (imgtype.image.depth)
-				fname += ", float";
-			if (!imgtype.image.depth || has_offset)
-				fname += ", int2";
-			break;
-		case DimCube:
-			fname += ", float3";
-			if (imgtype.image.arrayed)
-				fname += ", uint";
-			if (imgtype.image.depth)
-				fname += ", float";
-			break;
-		default:
-			SPIRV_CROSS_THROW("Invalid texture dimension for gather op.");
-		}
-		fname += ">";
-		return fname;
+		add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle);
+		return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
 	}
 
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
 
 	// Texture reference
-	string fname = to_expression(combined ? combined->image : img) + ".";
-	if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype))
-		fname = "spvTextureSwizzle(" + fname;
-
-	// Texture function and sampler
-	if (is_fetch)
-		fname += "read";
-	else if (is_gather)
-		fname += "gather";
+	string fname;
+	if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler)
+	{
+		if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3)
+			SPIRV_CROSS_THROW("Unhandled number of color image planes!");
+		// 444 images aren't downsampled, so we don't need to do linear filtering.
+		if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 ||
+		    constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST)
+		{
+			if (constexpr_sampler->planes == 2)
+				add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane);
+			else
+				add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane);
+			fname = "spvChromaReconstructNearest";
+		}
+		else // Linear with a downsampled format
+		{
+			fname = "spvChromaReconstructLinear";
+			switch (constexpr_sampler->resolution)
+			{
+			case MSL_FORMAT_RESOLUTION_444:
+				assert(false);
+				break; // not reached
+			case MSL_FORMAT_RESOLUTION_422:
+				switch (constexpr_sampler->x_chroma_offset)
+				{
+				case MSL_CHROMA_LOCATION_COSITED_EVEN:
+					if (constexpr_sampler->planes == 2)
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane);
+					else
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane);
+					fname += "422CositedEven";
+					break;
+				case MSL_CHROMA_LOCATION_MIDPOINT:
+					if (constexpr_sampler->planes == 2)
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane);
+					else
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane);
+					fname += "422Midpoint";
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid chroma location.");
+				}
+				break;
+			case MSL_FORMAT_RESOLUTION_420:
+				fname += "420";
+				switch (constexpr_sampler->x_chroma_offset)
+				{
+				case MSL_CHROMA_LOCATION_COSITED_EVEN:
+					switch (constexpr_sampler->y_chroma_offset)
+					{
+					case MSL_CHROMA_LOCATION_COSITED_EVEN:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane);
+						fname += "XCositedEvenYCositedEven";
+						break;
+					case MSL_CHROMA_LOCATION_MIDPOINT:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane);
+						fname += "XCositedEvenYMidpoint";
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid Y chroma location.");
+					}
+					break;
+				case MSL_CHROMA_LOCATION_MIDPOINT:
+					switch (constexpr_sampler->y_chroma_offset)
+					{
+					case MSL_CHROMA_LOCATION_COSITED_EVEN:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane);
+						fname += "XMidpointYCositedEven";
+						break;
+					case MSL_CHROMA_LOCATION_MIDPOINT:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane);
+						else
+							add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane);
+						fname += "XMidpointYMidpoint";
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid Y chroma location.");
+					}
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid X chroma location.");
+				}
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid format resolution.");
+			}
+		}
+	}
 	else
-		fname += "sample";
+	{
+		fname = to_expression(combined ? combined->image : img) + ".";
 
-	if (has_dref)
-		fname += "_compare";
+		// Texture function and sampler
+		if (is_fetch)
+			fname += "read";
+		else if (is_gather)
+			fname += "gather";
+		else
+			fname += "sample";
+
+		if (has_dref)
+			fname += "_compare";
+	}
 
 	return fname;
 }
 
+string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
+{
+	SPIRType t;
+	t.basetype = SPIRType::Float;
+	t.vecsize = components;
+	t.columns = 1;
+	return join(type_to_glsl_constructor(t), "(", expr, ")");
+}
+
+static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
+{
+	// Double is not supported to begin with, but doesn't hurt to check for completion.
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
+}
+
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
-                                     uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
-                                     uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp,
-                                     uint32_t sample, bool *p_forward)
+string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+                                     bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x,
+                                     uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
+                                     uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward)
 {
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
 	string farg_str;
-	if (!is_fetch)
-		farg_str += to_sampler_expression(img);
+	bool forward = true;
 
-	if (msl_options.swizzle_texture_samples && is_gather)
+	if (!is_dynamic_img_sampler)
 	{
-		if (!farg_str.empty())
-			farg_str += ", ";
+		// Texture reference (for some cases)
+		if (needs_chroma_reconstruction(constexpr_sampler))
+		{
+			// Multiplanar images need two or three textures.
+			farg_str += to_expression(img);
+			for (uint32_t i = 1; i < constexpr_sampler->planes; i++)
+				farg_str += join(", ", to_expression(img), plane_name_suffix, i);
+		}
+		else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+		         msl_options.swizzle_texture_samples && is_gather)
+		{
+			auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
+			farg_str += to_expression(combined ? combined->image : img);
+		}
 
-		auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
-		farg_str += to_expression(combined ? combined->image : img);
+		// Sampler reference
+		if (!is_fetch)
+		{
+			if (!farg_str.empty())
+				farg_str += ", ";
+			farg_str += to_sampler_expression(img);
+		}
+
+		if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+		    msl_options.swizzle_texture_samples && is_gather)
+		{
+			// Add the swizzle constant from the swizzle buffer.
+			farg_str += ", " + to_swizzle_expression(img);
+			used_swizzle_buffer = true;
+		}
+
+		// Swizzled gather puts the component before the other args, to allow template
+		// deduction to work.
+		if (comp && msl_options.swizzle_texture_samples)
+		{
+			forward = should_forward(comp);
+			farg_str += ", " + to_component_argument(comp);
+		}
 	}
 
 	// Texture coordinates
-	bool forward = should_forward(coord);
+	forward = forward && should_forward(coord);
 	auto coord_expr = to_enclosed_expression(coord);
 	auto &coord_type = expression_type(coord);
 	bool coord_is_fp = type_is_floating_point(coord_type);
@@ -4483,6 +6593,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 1);
 
 		alt_coord_component = 1;
 		break;
@@ -4518,6 +6630,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 2);
 
 		alt_coord_component = 2;
 		break;
@@ -4528,6 +6642,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
 
 		alt_coord_component = 3;
 		break;
@@ -4545,6 +6661,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 				tex_coords = enclose_expression(tex_coords) + ".xyz";
 		}
 
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
+
 		alt_coord_component = 3;
 		break;
 
@@ -4575,7 +6694,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 	// If projection, use alt coord as divisor
 	if (is_proj)
-		tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	{
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+		else
+			tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	}
 
 	if (!farg_str.empty())
 		farg_str += ", ";
@@ -4609,11 +6733,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		forward = forward && should_forward(dref);
 		farg_str += ", ";
 
+		auto &dref_type = expression_type(dref);
+
+		string dref_expr;
 		if (is_proj)
-			farg_str +=
-			    to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
+			dref_expr =
+			    join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
 		else
-			farg_str += to_expression(dref);
+			dref_expr = to_expression(dref);
+
+		if (sampling_type_needs_f32_conversion(dref_type))
+			dref_expr = convert_to_f32(dref_expr, 1);
+
+		farg_str += dref_expr;
 
 		if (msl_options.is_macos() && (grad_x || grad_y))
 		{
@@ -4706,6 +6838,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")";
 	}
 
+	if (minlod)
+	{
+		if (msl_options.is_macos())
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS.");
+		}
+		else if (msl_options.is_ios())
+			SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS.");
+
+		forward = forward && should_forward(minlod);
+		farg_str += ", min_lod_clamp(" + to_expression(minlod) + ")";
+	}
+
 	// Add offsets
 	string offset_expr;
 	if (coffset && !is_fetch)
@@ -4748,25 +6894,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		if (imgtype.image.dim == Dim2D && offset_expr.empty())
 			farg_str += ", int2(0)";
 
-		forward = forward && should_forward(comp);
-		farg_str += ", " + to_component_argument(comp);
+		if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
+		{
+			forward = forward && should_forward(comp);
+			farg_str += ", " + to_component_argument(comp);
+		}
 	}
 
 	if (sample)
 	{
+		forward = forward && should_forward(sample);
 		farg_str += ", ";
 		farg_str += to_expression(sample);
 	}
 
-	if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype))
-	{
-		// Add the swizzle constant from the swizzle buffer.
-		if (!is_gather)
-			farg_str += ")";
-		farg_str += ", " + to_swizzle_expression(img);
-		used_aux_buffer = true;
-	}
-
 	*p_forward = forward;
 
 	return farg_str;
@@ -4813,12 +6954,216 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id
 	set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
 }
 
+string CompilerMSL::to_texture_op(const Instruction &i, bool *forward, SmallVector<uint32_t> &inherited_expressions)
+{
+	auto *ops = stream(i);
+	uint32_t result_type_id = ops[0];
+	uint32_t img = ops[2];
+	auto &result_type = get<SPIRType>(result_type_id);
+	auto op = static_cast<Op>(i.op);
+	bool is_gather = (op == OpImageGather || op == OpImageDrefGather);
+
+	// Bypass pointers because we need the real image struct
+	auto &type = expression_type(img);
+	auto &imgtype = get<SPIRType>(type.self);
+
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
+	string expr;
+	if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+	{
+		// If this needs sampler Y'CbCr conversion, we need to do some additional
+		// processing.
+		switch (constexpr_sampler->ycbcr_model)
+		{
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709);
+			expr += "spvConvertYCbCrBT709(";
+			break;
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601);
+			expr += "spvConvertYCbCrBT601(";
+			break;
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020);
+			expr += "spvConvertYCbCrBT2020(";
+			break;
+		default:
+			SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+		}
+
+		if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+		{
+			switch (constexpr_sampler->ycbcr_range)
+			{
+			case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL:
+				add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange);
+				expr += "spvExpandITUFullRange(";
+				break;
+			case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW:
+				add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange);
+				expr += "spvExpandITUNarrowRange(";
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid Y'CbCr range.");
+			}
+		}
+	}
+	else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+	         !is_dynamic_img_sampler)
+	{
+		add_spv_func_and_recompile(SPVFuncImplTextureSwizzle);
+		expr += "spvTextureSwizzle(";
+	}
+
+	string inner_expr = CompilerGLSL::to_texture_op(i, forward, inherited_expressions);
+
+	if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+	{
+		if (!constexpr_sampler->swizzle_is_identity())
+		{
+			static const char swizzle_names[] = "rgba";
+			if (!constexpr_sampler->swizzle_has_one_or_zero())
+			{
+				// If we can, do it inline.
+				expr += inner_expr + ".";
+				for (uint32_t c = 0; c < 4; c++)
+				{
+					switch (constexpr_sampler->swizzle[c])
+					{
+					case MSL_COMPONENT_SWIZZLE_IDENTITY:
+						expr += swizzle_names[c];
+						break;
+					case MSL_COMPONENT_SWIZZLE_R:
+					case MSL_COMPONENT_SWIZZLE_G:
+					case MSL_COMPONENT_SWIZZLE_B:
+					case MSL_COMPONENT_SWIZZLE_A:
+						expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid component swizzle.");
+					}
+				}
+			}
+			else
+			{
+				// Otherwise, we need to emit a temporary and swizzle that.
+				uint32_t temp_id = ir.increase_bound_by(1);
+				emit_op(result_type_id, temp_id, inner_expr, false);
+				for (auto &inherit : inherited_expressions)
+					inherit_expression_dependencies(temp_id, inherit);
+				inherited_expressions.clear();
+				inherited_expressions.push_back(temp_id);
+
+				switch (op)
+				{
+				case OpImageSampleDrefImplicitLod:
+				case OpImageSampleImplicitLod:
+				case OpImageSampleProjImplicitLod:
+				case OpImageSampleProjDrefImplicitLod:
+					register_control_dependent_expression(temp_id);
+					break;
+
+				default:
+					break;
+				}
+				expr += type_to_glsl(result_type) + "(";
+				for (uint32_t c = 0; c < 4; c++)
+				{
+					switch (constexpr_sampler->swizzle[c])
+					{
+					case MSL_COMPONENT_SWIZZLE_IDENTITY:
+						expr += to_expression(temp_id) + "." + swizzle_names[c];
+						break;
+					case MSL_COMPONENT_SWIZZLE_ZERO:
+						expr += "0";
+						break;
+					case MSL_COMPONENT_SWIZZLE_ONE:
+						expr += "1";
+						break;
+					case MSL_COMPONENT_SWIZZLE_R:
+					case MSL_COMPONENT_SWIZZLE_G:
+					case MSL_COMPONENT_SWIZZLE_B:
+					case MSL_COMPONENT_SWIZZLE_A:
+						expr += to_expression(temp_id) + "." +
+						        swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid component swizzle.");
+					}
+					if (c < 3)
+						expr += ", ";
+				}
+				expr += ")";
+			}
+		}
+		else
+			expr += inner_expr;
+		if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+		{
+			expr += join(", ", constexpr_sampler->bpc, ")");
+			if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
+				expr += ")";
+		}
+	}
+	else
+	{
+		expr += inner_expr;
+		if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+		    !is_dynamic_img_sampler)
+		{
+			// Add the swizzle constant from the swizzle buffer.
+			expr += ", " + to_swizzle_expression(img) + ")";
+			used_swizzle_buffer = true;
+		}
+	}
+
+	return expr;
+}
+
+static string create_swizzle(MSLComponentSwizzle swizzle)
+{
+	switch (swizzle)
+	{
+	case MSL_COMPONENT_SWIZZLE_IDENTITY:
+		return "spvSwizzle::none";
+	case MSL_COMPONENT_SWIZZLE_ZERO:
+		return "spvSwizzle::zero";
+	case MSL_COMPONENT_SWIZZLE_ONE:
+		return "spvSwizzle::one";
+	case MSL_COMPONENT_SWIZZLE_R:
+		return "spvSwizzle::red";
+	case MSL_COMPONENT_SWIZZLE_G:
+		return "spvSwizzle::green";
+	case MSL_COMPONENT_SWIZZLE_B:
+		return "spvSwizzle::blue";
+	case MSL_COMPONENT_SWIZZLE_A:
+		return "spvSwizzle::alpha";
+	default:
+		SPIRV_CROSS_THROW("Invalid component swizzle.");
+		return "";
+	}
+}
+
 // Returns a string representation of the ID, usable as a function arg.
 // Manufacture automatic sampler arg for SampledImage texture.
-string CompilerMSL::to_func_call_arg(uint32_t id)
+string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
 {
 	string arg_str;
 
+	auto &type = expression_type(id);
+	bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+	// If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around.
+	bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler);
+	if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
+		arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
+
 	auto *c = maybe_get<SPIRConstant>(id);
 	if (c && !get<SPIRType>(c->constant_type).array.empty())
 	{
@@ -4833,7 +7178,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 		// so just create a thread local copy in the current function.
 		arg_str = join("_", id, "_array_copy");
 		auto &constants = current_function->constant_arrays_needed_on_stack;
-		auto itr = find(begin(constants), end(constants), id);
+		auto itr = find(begin(constants), end(constants), ID(id));
 		if (itr == end(constants))
 		{
 			force_recompile();
@@ -4841,22 +7186,106 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 		}
 	}
 	else
-		arg_str = CompilerGLSL::to_func_call_arg(id);
+		arg_str += CompilerGLSL::to_func_call_arg(arg, id);
 
-	// Manufacture automatic sampler arg if the arg is a SampledImage texture.
-	auto &type = expression_type(id);
-	if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
+	if (!arg_is_dynamic_img_sampler)
 	{
 		// Need to check the base variable in case we need to apply a qualified alias.
 		uint32_t var_id = 0;
-		auto *sampler_var = maybe_get<SPIRVariable>(id);
-		if (sampler_var)
-			var_id = sampler_var->basevariable;
+		auto *var = maybe_get<SPIRVariable>(id);
+		if (var)
+			var_id = var->basevariable;
 
-		arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
+		auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id);
+		if (type.basetype == SPIRType::SampledImage)
+		{
+			// Manufacture automatic plane args for multiplanar texture
+			uint32_t planes = 1;
+			if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			{
+				planes = constexpr_sampler->planes;
+				// If this parameter isn't aliasing a global, then we need to use
+				// the special "dynamic image-sampler" class to pass it--and we need
+				// to use it for *every* non-alias parameter, in case a combined
+				// image-sampler with a Y'CbCr conversion is passed. Hopefully, this
+				// pathological case is so rare that it should never be hit in practice.
+				if (!arg.alias_global_variable)
+					add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler);
+			}
+			for (uint32_t i = 1; i < planes; i++)
+				arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i);
+			// Manufacture automatic sampler arg if the arg is a SampledImage texture.
+			if (type.image.dim != DimBuffer)
+				arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
+
+			// Add sampler Y'CbCr conversion info if we have it
+			if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			{
+				SmallVector<string> samp_args;
+
+				switch (constexpr_sampler->resolution)
+				{
+				case MSL_FORMAT_RESOLUTION_444:
+					// Default
+					break;
+				case MSL_FORMAT_RESOLUTION_422:
+					samp_args.push_back("spvFormatResolution::_422");
+					break;
+				case MSL_FORMAT_RESOLUTION_420:
+					samp_args.push_back("spvFormatResolution::_420");
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid format resolution.");
+				}
+
+				if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST)
+					samp_args.push_back("spvChromaFilter::linear");
+
+				if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+					samp_args.push_back("spvXChromaLocation::midpoint");
+				if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+					samp_args.push_back("spvYChromaLocation::midpoint");
+				switch (constexpr_sampler->ycbcr_model)
+				{
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
+					// Default
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020");
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+				}
+				if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL)
+					samp_args.push_back("spvYCbCrRange::itu_narrow");
+				samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")"));
+				arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")");
+			}
+		}
+
+		if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(",
+			                create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(",
+			                create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(",
+			                create_swizzle(constexpr_sampler->swizzle[0]), ")");
+		else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
+			arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
+
+		if (buffers_requiring_array_length.count(var_id))
+			arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
+
+		if (is_dynamic_img_sampler)
+			arg_str += ")";
 	}
-	if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
-		arg_str += ", " + to_swizzle_expression(id);
 
 	return arg_str;
 }
@@ -4867,7 +7296,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 string CompilerMSL::to_sampler_expression(uint32_t id)
 {
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
-	auto expr = to_expression(combined ? combined->image : id);
+	auto expr = to_expression(combined ? combined->image : VariableID(id));
 	auto index = expr.find_first_of('[');
 
 	uint32_t samp_id = 0;
@@ -4887,9 +7316,15 @@ string CompilerMSL::to_sampler_expression(uint32_t id)
 string CompilerMSL::to_swizzle_expression(uint32_t id)
 {
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
-	auto expr = to_expression(combined ? combined->image : id);
+
+	auto expr = to_expression(combined ? combined->image : VariableID(id));
 	auto index = expr.find_first_of('[');
 
+	// If an image is part of an argument buffer translate this to a legal identifier.
+	for (auto &c : expr)
+		if (c == '.')
+			c = '_';
+
 	if (index == string::npos)
 		return expr + swizzle_name_suffix;
 	else
@@ -4900,6 +7335,32 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
 	}
 }
 
+string CompilerMSL::to_buffer_size_expression(uint32_t id)
+{
+	auto expr = to_expression(id);
+	auto index = expr.find_first_of('[');
+
+	// This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to
+	// the pointer expression spvDescriptorSetN.name to make a reasonable expression here.
+	// This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set.
+	if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*')
+		expr = address_of_expression(expr);
+
+	// If a buffer is part of an argument buffer translate this to a legal identifier.
+	for (auto &c : expr)
+		if (c == '.')
+			c = '_';
+
+	if (index == string::npos)
+		return expr + buffer_size_name_suffix;
+	else
+	{
+		auto buffer_expr = expr.substr(0, index);
+		auto array_expr = expr.substr(index);
+		return buffer_expr + buffer_size_name_suffix + array_expr;
+	}
+}
+
 // Checks whether the type is a Block all of whose members have DecorationPatch.
 bool CompilerMSL::is_patch_block(const SPIRType &type)
 {
@@ -4918,91 +7379,33 @@ bool CompilerMSL::is_patch_block(const SPIRType &type)
 // Checks whether the ID is a row_major matrix that requires conversion before use
 bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
 {
-	// Natively supported row-major matrices do not need to be converted.
-	if (backend.native_row_major_matrix)
-		return false;
-
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_decoration(id, DecorationRowMajor))
-		return false;
-
-	// Generate a function that will swap matrix elements from row-major to column-major.
-	// Packed row-matrix should just use transpose() function.
-	if (!has_extended_decoration(id, SPIRVCrossDecorationPacked))
-	{
-		const auto type = expression_type(id);
-		add_convert_row_major_matrix_function(type.columns, type.vecsize);
-	}
-
-	return true;
+	auto *e = maybe_get<SPIRExpression>(id);
+	if (e)
+		return e->need_transpose;
+	else
+		return has_decoration(id, DecorationRowMajor);
 }
 
 // Checks whether the member is a row_major matrix that requires conversion before use
 bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
 {
-	// Natively supported row-major matrices do not need to be converted.
-	if (backend.native_row_major_matrix)
-		return false;
-
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_member_decoration(type.self, index, DecorationRowMajor))
-		return false;
-
-	// Generate a function that will swap matrix elements from row-major to column-major.
-	// Packed row-matrix should just use transpose() function.
-	if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked))
-	{
-		const auto mbr_type = get<SPIRType>(type.member_types[index]);
-		add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
-	}
-
-	return true;
+	return has_member_decoration(type.self, index, DecorationRowMajor);
 }
 
-// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix.
-void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows)
+string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+                                             bool is_packed)
 {
-	SPVFuncImpl spv_func;
-	if (cols == rows) // Square matrix...just use transpose() function
-		return;
-	else if (cols == 2 && rows == 3)
-		spv_func = SPVFuncImplRowMajor2x3;
-	else if (cols == 2 && rows == 4)
-		spv_func = SPVFuncImplRowMajor2x4;
-	else if (cols == 3 && rows == 2)
-		spv_func = SPVFuncImplRowMajor3x2;
-	else if (cols == 3 && rows == 4)
-		spv_func = SPVFuncImplRowMajor3x4;
-	else if (cols == 4 && rows == 2)
-		spv_func = SPVFuncImplRowMajor4x2;
-	else if (cols == 4 && rows == 3)
-		spv_func = SPVFuncImplRowMajor4x3;
-	else
-		SPIRV_CROSS_THROW("Could not convert row-major matrix.");
-
-	auto rslt = spv_function_implementations.insert(spv_func);
-	if (rslt.second)
+	if (!is_matrix(exp_type))
 	{
-		suppress_missing_prototypes = true;
-		force_recompile();
+		return CompilerGLSL::convert_row_major_matrix(move(exp_str), exp_type, physical_type_id, is_packed);
 	}
-}
-
-// Wraps the expression string in a function call that converts the
-// row_major matrix result of the expression to a column_major matrix.
-string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
-{
-	strip_enclosed_expression(exp_str);
-
-	string func_name;
-
-	// Square and packed matrices can just use transpose
-	if (exp_type.columns == exp_type.vecsize || is_packed)
-		func_name = "transpose";
 	else
-		func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
-
-	return join(func_name, "(", exp_str, ")");
+	{
+		strip_enclosed_expression(exp_str);
+		if (physical_type_id != 0 || is_packed)
+			exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true);
+		return join("transpose(", exp_str, ")");
+	}
 }
 
 // Called automatically at the end of the entry point function
@@ -5025,55 +7428,66 @@ void CompilerMSL::emit_fixup()
 string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
                                      const string &qualifier)
 {
-	auto &membertype = get<SPIRType>(member_type_id);
-
-	// If this member requires padding to maintain alignment, emit a dummy padding member.
-	MSLStructMemberKey key = get_struct_member_key(type.self, index);
-	uint32_t pad_len = struct_member_padding[key];
-	if (pad_len > 0)
-		statement("char _m", index, "_pad", "[", to_string(pad_len), "];");
+	if (member_is_remapped_physical_type(type, index))
+		member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+	auto &physical_type = get<SPIRType>(member_type_id);
 
 	// If this member is packed, mark it as so.
-	string pack_pfx = "";
-
-	const SPIRType *effective_membertype = &membertype;
-	SPIRType override_type;
+	string pack_pfx;
 
 	uint32_t orig_id = 0;
 	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))
 		orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID);
 
-	if (member_is_packed_type(type, index))
+	bool row_major = false;
+	if (is_matrix(physical_type))
+		row_major = has_member_decoration(type.self, index, DecorationRowMajor);
+
+	SPIRType row_major_physical_type;
+	const SPIRType *declared_type = &physical_type;
+
+	if (member_is_packed_physical_type(type, index))
 	{
 		// If we're packing a matrix, output an appropriate typedef
-		if (membertype.basetype == SPIRType::Struct)
+		if (physical_type.basetype == SPIRType::Struct)
 		{
-			pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ ";
+			SPIRV_CROSS_THROW("Cannot emit a packed struct currently.");
 		}
-		else if (membertype.vecsize > 1 && membertype.columns > 1)
+		else if (is_matrix(physical_type))
 		{
+			uint32_t rows = physical_type.vecsize;
+			uint32_t cols = physical_type.columns;
 			pack_pfx = "packed_";
-			string base_type = membertype.width == 16 ? "half" : "float";
+			if (row_major)
+			{
+				// These are stored transposed.
+				rows = physical_type.columns;
+				cols = physical_type.vecsize;
+				pack_pfx = "packed_rm_";
+			}
+			string base_type = physical_type.width == 16 ? "half" : "float";
 			string td_line = "typedef ";
-			td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
+			td_line += "packed_" + base_type + to_string(rows);
 			td_line += " " + pack_pfx;
-			td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
+			// Use the actual matrix size here.
+			td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize);
+			td_line += "[" + to_string(cols) + "]";
 			td_line += ";";
 			add_typedef_line(td_line);
 		}
-		else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
-		{
-			// A "packed" float array, but we pad here instead to 4-vector.
-			override_type = membertype;
-			override_type.vecsize = 4;
-			effective_membertype = &override_type;
-		}
 		else
 			pack_pfx = "packed_";
 	}
+	else if (row_major)
+	{
+		// Need to declare type with flipped vecsize/columns.
+		row_major_physical_type = physical_type;
+		swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
+		declared_type = &row_major_physical_type;
+	}
 
 	// Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
-	if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2)
+	if (msl_options.is_ios() && physical_type.basetype == SPIRType::Image && physical_type.image.sampled == 2)
 	{
 		if (!has_decoration(orig_id, DecorationNonWritable))
 			SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS.");
@@ -5081,13 +7495,13 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 
 	// Array information is baked into these types.
 	string array_type;
-	if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler &&
-	    membertype.basetype != SPIRType::SampledImage)
+	if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler &&
+	    physical_type.basetype != SPIRType::SampledImage)
 	{
-		array_type = type_to_array_glsl(membertype);
+		array_type = type_to_array_glsl(physical_type);
 	}
 
-	return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index),
+	return join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
 	            member_attribute_qualifier(type, index), array_type, ";");
 }
 
@@ -5095,9 +7509,26 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
 void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
                                      const string &qualifier, uint32_t)
 {
+	// If this member requires padding to maintain its declared offset, emit a dummy padding member before it.
+	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget))
+	{
+		uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget);
+		statement("char _m", index, "_pad", "[", pad_len, "];");
+	}
+
 	statement(to_struct_member(type, member_type_id, index, qualifier));
 }
 
+void CompilerMSL::emit_struct_padding_target(const SPIRType &type)
+{
+	uint32_t struct_size = get_declared_struct_size_msl(type, true, true);
+	uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget);
+	if (target_size < struct_size)
+		SPIRV_CROSS_THROW("Cannot pad with negative bytes.");
+	else if (target_size > struct_size)
+		statement("char _m0_final_padding[", target_size - struct_size, "];");
+}
+
 // Return a MSL qualifier for the specified function attribute member
 string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index)
 {
@@ -5109,9 +7540,16 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 	BuiltIn builtin = BuiltInMax;
 	bool is_builtin = is_member_builtin(type, index, &builtin);
 
-	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID))
-		return join(" [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID),
-		            ")]]");
+	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
+	{
+		string quals = join(
+		    " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+		if (interlocked_resources.count(
+		        get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+			quals += ", raster_order_group(0)";
+		quals += "]]";
+		return quals;
+	}
 
 	// Vertex function inputs
 	if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@@ -5188,6 +7626,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			{
 			case BuiltInInvocationId:
 			case BuiltInPrimitiveId:
+			case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
+			case BuiltInSubgroupSize: // FIXME: Should work in any stage
 				return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
 			case BuiltInPatchVertices:
 				return "";
@@ -5239,18 +7679,25 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 	// Fragment function inputs
 	if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput)
 	{
-		string quals = "";
+		string quals;
 		if (is_builtin)
 		{
 			switch (builtin)
 			{
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+					break;
+				/* fallthrough */
 			case BuiltInFrontFacing:
 			case BuiltInPointCoord:
 			case BuiltInFragCoord:
 			case BuiltInSampleId:
 			case BuiltInSampleMask:
 			case BuiltInLayer:
+			case BuiltInBaryCoordNV:
+			case BuiltInBaryCoordNoPerspNV:
 				quals = builtin_qualifier(builtin);
+				break;
 
 			default:
 				break;
@@ -5268,6 +7715,20 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 					quals = string("user(locn") + convert_to_string(locn) + ")";
 			}
 		}
+
+		if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV)
+		{
+			if (has_member_decoration(type.self, index, DecorationFlat) ||
+			    has_member_decoration(type.self, index, DecorationCentroid) ||
+			    has_member_decoration(type.self, index, DecorationSample) ||
+			    has_member_decoration(type.self, index, DecorationNoPerspective))
+			{
+				// NoPerspective is baked into the builtin type.
+				SPIRV_CROSS_THROW(
+				    "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs.");
+			}
+		}
+
 		// Don't bother decorating integers with the 'flat' attribute; it's
 		// the default (in fact, the only option). Also don't bother with the
 		// FragCoord builtin; it's always noperspective on Metal.
@@ -5304,6 +7765,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				quals += "center_no_perspective";
 			}
 		}
+
 		if (!quals.empty())
 			return " [[" + quals + "]]";
 	}
@@ -5315,6 +7777,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 		{
 			switch (builtin)
 			{
+			case BuiltInFragStencilRefEXT:
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up.");
+				return string(" [[") + builtin_qualifier(builtin) + "]]";
+
 			case BuiltInSampleMask:
 			case BuiltInFragDepth:
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
@@ -5347,6 +7814,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			case BuiltInNumWorkgroups:
 			case BuiltInLocalInvocationId:
 			case BuiltInLocalInvocationIndex:
+			case BuiltInNumSubgroups:
+			case BuiltInSubgroupId:
+			case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
+			case BuiltInSubgroupSize: // FIXME: Should work in any stage
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
 
 			default:
@@ -5417,8 +7888,10 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 			                  execution.output_vertices, ") ]] vertex");
 		break;
 	case ExecutionModelFragment:
-		entry_type =
-		    execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment";
+		entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) ||
+		                     execution.flags.get(ExecutionModePostDepthCoverage) ?
+		                 "[[ early_fragment_tests ]] fragment" :
+		                 "fragment";
 		break;
 	case ExecutionModelTessellationControl:
 		if (!msl_options.supports_msl_version(1, 2))
@@ -5442,21 +7915,37 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 {
 	const auto &type = get<SPIRType>(argument.basetype);
+	return get_type_address_space(type, argument.self, true);
+}
 
+string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
+{
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && type.basetype == SPIRType::Struct &&
+	    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
+		flags = get_buffer_block_flags(id);
+	else
+		flags = get_decoration_bitset(id);
+
+	const char *addr_space = nullptr;
 	switch (type.storage)
 	{
 	case StorageClassWorkgroup:
-		return "threadgroup";
+		addr_space = "threadgroup";
+		break;
 
 	case StorageClassStorageBuffer:
 	{
 		// For arguments from variable pointers, we use the write count deduction, so
 		// we should not assume any constness here. Only for global SSBOs.
 		bool readonly = false;
-		if (has_decoration(type.self, DecorationBlock))
-			readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
+		if (!var || has_decoration(type.self, DecorationBlock))
+			readonly = flags.get(DecorationNonWritable);
 
-		return readonly ? "const device" : "device";
+		addr_space = readonly ? "const device" : "device";
+		break;
 	}
 
 	case StorageClassUniform:
@@ -5466,93 +7955,58 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 		{
 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 			if (ssbo)
-			{
-				bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
-				return readonly ? "const device" : "device";
-			}
+				addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
 			else
-				return "constant";
+				addr_space = "constant";
 		}
+		else if (!argument)
+			addr_space = "constant";
 		break;
 
 	case StorageClassFunction:
 	case StorageClassGeneric:
-		// No address space for plain values.
-		return type.pointer ? "thread" : "";
+		break;
 
 	case StorageClassInput:
-		if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
-			return "threadgroup";
+		if (get_execution_model() == ExecutionModelTessellationControl && var &&
+		    var->basevariable == stage_in_ptr_var_id)
+			addr_space = "threadgroup";
 		break;
 
 	case StorageClassOutput:
 		if (capture_output_to_buffer)
-			return "device";
+			addr_space = "device";
 		break;
 
 	default:
 		break;
 	}
 
-	return "thread";
+	if (!addr_space)
+		// No address space for plain values.
+		addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
+
+	return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
 }
 
-string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
+const char *CompilerMSL::to_restrict(uint32_t id, bool space)
 {
-	switch (type.storage)
-	{
-	case StorageClassWorkgroup:
-		return "threadgroup";
-
-	case StorageClassStorageBuffer:
-	{
-		// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-		Bitset flags;
-		if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	if (ir.ids[id].get_type() == TypeVariable)
+	{
+		uint32_t type_id = expression_type_id(id);
+		auto &type = expression_type(id);
+		if (type.basetype == SPIRType::Struct &&
+		    (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
 			flags = get_buffer_block_flags(id);
 		else
 			flags = get_decoration_bitset(id);
-
-		return flags.get(DecorationNonWritable) ? "const device" : "device";
-	}
-
-	case StorageClassUniform:
-	case StorageClassUniformConstant:
-	case StorageClassPushConstant:
-		if (type.basetype == SPIRType::Struct)
-		{
-			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
-			if (ssbo)
-			{
-				// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-				Bitset flags;
-				if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
-					flags = get_buffer_block_flags(id);
-				else
-					flags = get_decoration_bitset(id);
-
-				return flags.get(DecorationNonWritable) ? "const device" : "device";
-			}
-			else
-				return "constant";
-		}
-		break;
-
-	case StorageClassFunction:
-	case StorageClassGeneric:
-		// No address space for plain values.
-		return type.pointer ? "thread" : "";
-
-	case StorageClassOutput:
-		if (capture_output_to_buffer)
-			return "device";
-		break;
-
-	default:
-		break;
 	}
+	else
+		flags = get_decoration_bitset(id);
 
-	return "thread";
+	return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
 }
 
 string CompilerMSL::entry_point_arg_stage_in()
@@ -5581,8 +8035,9 @@ string CompilerMSL::entry_point_arg_stage_in()
 void CompilerMSL::entry_point_args_builtin(string &ep_args)
 {
 	// Builtin variables
+	SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins;
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
-		BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type;
+		auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
 
 		// Don't emit SamplePosition as a separate parameter. In the entry
 		// point, we get that by calling get_sample_position() on the sample ID.
@@ -5590,20 +8045,66 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 		    get_variable_data_type(var).basetype != SPIRType::Struct &&
 		    get_variable_data_type(var).basetype != SPIRType::ControlPointArray)
 		{
+			// If the builtin is not part of the active input builtin set, don't emit it.
+			// Relevant for multiple entry-point modules which might declare unused builtins.
+			if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id))
+				return;
+
+			// Remember this variable. We may need to correct its type.
+			active_builtins.push_back(make_pair(&var, bi_type));
+
+			// These builtins are emitted specially. If we pass this branch, the builtin directly matches
+			// a MSL builtin.
 			if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation &&
 			    bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner &&
 			    bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize &&
-			    bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance)
+			    bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
+			    bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
+			    bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
+			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask && bi_type != BuiltInDeviceIndex &&
+			    ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
+			     bi_type != BuiltInViewIndex) &&
+			    (get_execution_model() == ExecutionModelGLCompute ||
+			     (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
+			     (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
 
-				ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
-				ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
+				ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
+				ep_args += " [[" + builtin_qualifier(bi_type);
+				if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
+				{
+					if (!msl_options.supports_msl_version(2))
+						SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0.");
+					if (!msl_options.is_ios())
+						SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS.");
+					ep_args += ", post_depth_coverage";
+				}
+				ep_args += "]]";
 			}
 		}
+
+		if (var.storage == StorageClassInput &&
+		    has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase))
+		{
+			// This is a special implicit builtin, not corresponding to any SPIR-V builtin,
+			// which holds the base that was passed to vkCmdDispatchBase(). If it's present,
+			// assume we emitted it for a good reason.
+			assert(msl_options.supports_msl_version(1, 2));
+			if (!ep_args.empty())
+				ep_args += ", ";
+
+			ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]";
+		}
 	});
 
+	// Correct the types of all encountered active builtins. We couldn't do this before
+	// because ensure_correct_builtin_type() may increase the bound, which isn't allowed
+	// while iterating over IDs.
+	for (auto &var : active_builtins)
+		var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second);
+
 	// Vertex and instance index built-ins
 	if (needs_vertex_idx_arg)
 		ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty());
@@ -5670,6 +8171,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 {
 	string ep_args = entry_point_arg_stage_in();
+	Bitset claimed_bindings;
 
 	for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
 	{
@@ -5684,12 +8186,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 		if (!ep_args.empty())
 			ep_args += ", ";
 
-		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
-		ep_args += " [[buffer(" + convert_to_string(i) + ")]]";
+		// Check if the argument buffer binding itself has been remapped.
+		uint32_t buffer_binding;
+		auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
+		if (itr != end(resource_bindings))
+		{
+			buffer_binding = itr->second.first.msl_buffer;
+			itr->second.second = true;
+		}
+		else
+		{
+			// As a fallback, directly map desc set <-> binding.
+			// If that was taken, take the next buffer binding.
+			if (claimed_bindings.get(i))
+				buffer_binding = next_metal_resource_index_buffer;
+			else
+				buffer_binding = i;
+		}
+
+		claimed_bindings.set(buffer_binding);
+
+		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
+		ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
 
-		// Makes it more practical for testing, since the push constant block can occupy the first available
-		// buffer slot if it's not bound explicitly.
-		next_metal_resource_index_buffer = i + 1;
+		next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
 	}
 
 	entry_point_args_discrete_descriptors(ep_args);
@@ -5701,6 +8221,28 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 	return ep_args;
 }
 
+const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const
+{
+	// Try by ID.
+	{
+		auto itr = constexpr_samplers_by_id.find(id);
+		if (itr != end(constexpr_samplers_by_id))
+			return &itr->second;
+	}
+
+	// Try by binding.
+	{
+		uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
+		uint32_t binding = get_decoration(id, DecorationBinding);
+
+		auto itr = constexpr_samplers_by_binding.find({ desc_set, binding });
+		if (itr != end(constexpr_samplers_by_binding))
+			return &itr->second;
+	}
+
+	return nullptr;
+}
+
 void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 {
 	// Output resources, sorted by resource index & type
@@ -5712,43 +8254,65 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 		string name;
 		SPIRType::BaseType basetype;
 		uint32_t index;
+		uint32_t plane;
 	};
 
 	SmallVector<Resource> resources;
 
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
 		if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
 		     var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
 		    !is_hidden_variable(var))
 		{
 			auto &type = get_variable_data_type(var);
-			uint32_t var_id = var.self;
 
-			if (var.storage != StorageClassPushConstant)
+			// Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
+			// But we won't know when the argument buffer is encoded whether this image will have
+			// a NonWritable decoration. So just use discrete arguments for all storage images
+			// on iOS.
+			if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) &&
+			    var.storage != StorageClassPushConstant)
 			{
 				uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
 				if (descriptor_set_is_argument_buffer(desc_set))
 					return;
 			}
 
+			const MSLConstexprSampler *constexpr_sampler = nullptr;
+			if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
+			{
+				constexpr_sampler = find_constexpr_sampler(var_id);
+				if (constexpr_sampler)
+				{
+					// Mark this ID as a constexpr sampler for later in case it came from set/bindings.
+					constexpr_samplers_by_id[var_id] = *constexpr_sampler;
+				}
+			}
+
 			if (type.basetype == SPIRType::SampledImage)
 			{
 				add_resource_name(var_id);
-				resources.push_back(
-				    { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) });
 
-				if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0)
+				uint32_t plane_count = 1;
+				if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+					plane_count = constexpr_sampler->planes;
+
+				for (uint32_t i = 0; i < plane_count; i++)
+					resources.push_back({ &var, to_name(var_id), SPIRType::Image,
+					                      get_metal_resource_index(var, SPIRType::Image, i), i });
+
+				if (type.image.dim != DimBuffer && !constexpr_sampler)
 				{
 					resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler,
-					                      get_metal_resource_index(var, SPIRType::Sampler) });
+					                      get_metal_resource_index(var, SPIRType::Sampler), 0 });
 				}
 			}
-			else if (constexpr_samplers.count(var_id) == 0)
+			else if (!constexpr_sampler)
 			{
 				// constexpr samplers are not declared as resources.
 				add_resource_name(var_id);
 				resources.push_back(
-				    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+				    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
 			}
 		}
 	});
@@ -5789,17 +8353,24 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 				{
 					if (!ep_args.empty())
 						ep_args += ", ";
-					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
-					           convert_to_string(i);
-					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
+					           r.name + "_" + convert_to_string(i);
+					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+					if (interlocked_resources.count(var_id))
+						ep_args += ", raster_order_group(0)";
+					ep_args += "]]";
 				}
 			}
 			else
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
-				ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
-				ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+				ep_args +=
+				    get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
+				ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
 			}
 			break;
 		}
@@ -5813,10 +8384,25 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 			if (!ep_args.empty())
 				ep_args += ", ";
 			ep_args += image_type_glsl(type, var_id) + " " + r.name;
-			ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+			if (r.plane > 0)
+				ep_args += join(plane_name_suffix, r.plane);
+			ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		default:
-			SPIRV_CROSS_THROW("Unexpected resource type");
+			if (!ep_args.empty())
+				ep_args += ", ";
+			if (!type.pointer)
+				ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " +
+				           type_to_glsl(type, var_id) + "& " + r.name;
+			else
+				ep_args += type_to_glsl(type, var_id) + " " + r.name;
+			ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		}
 	}
@@ -5838,28 +8424,62 @@ string CompilerMSL::entry_point_args_classic(bool append_comma)
 
 void CompilerMSL::fix_up_shader_inputs_outputs()
 {
-	// Look for sampled images. Add hooks to set up the swizzle constants.
+	// Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = get_variable_data_type(var);
-
 		uint32_t var_id = var.self;
+		bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 
-		if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
-		     var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
-		    !is_hidden_variable(var))
+		if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var))
 		{
 			if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
 			{
 				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
 				entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
-					auto &aux_type = expression_type(aux_buffer_id);
 					bool is_array_type = !type.array.empty();
 
-					// If we have an array of images, we need to be able to index into it, so take a pointer instead.
-					statement("constant uint32_t", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
-					          is_array_type ? " = &" : " = ", to_name(aux_buffer_id), ".",
-					          to_member_name(aux_type, k_aux_mbr_idx_swizzle_const), "[",
-					          convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+					uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+					if (descriptor_set_is_argument_buffer(desc_set))
+					{
+						statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
+						          is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
+						          ".spvSwizzleConstants", "[",
+						          convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+					}
+					else
+					{
+						// If we have an array of images, we need to be able to index into it, so take a pointer instead.
+						statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
+						          is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[",
+						          convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+					}
+				});
+			}
+		}
+		else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) &&
+		         !is_hidden_variable(var))
+		{
+			if (buffers_requiring_array_length.count(var.self))
+			{
+				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
+				entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
+					bool is_array_type = !type.array.empty();
+
+					uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+					if (descriptor_set_is_argument_buffer(desc_set))
+					{
+						statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
+						          is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
+						          ".spvBufferSizeConstants", "[",
+						          convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+					}
+					else
+					{
+						// If we have an array of images, we need to be able to index into it, so take a pointer instead.
+						statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
+						          is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[",
+						          convert_to_string(get_metal_resource_index(var, type.basetype)), "];");
+					}
 				});
 			}
 		}
@@ -5911,6 +8531,225 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
 				}
 				break;
+			case BuiltInSubgroupLocalInvocationId:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
+
+				// Shaders other than compute shaders don't support the SIMD-group
+				// builtins directly, but we can emulate them using the SIMD-group
+				// functions. This might break if some of the subgroup terminated
+				// before reaching the entry point.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = simd_prefix_exclusive_sum(1);");
+				});
+				break;
+			case BuiltInSubgroupSize:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
+
+				entry_func.fixup_hooks_in.push_back(
+				    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
+				break;
+			case BuiltInSubgroupEqMask:
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
+					          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
+					          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
+				});
+				break;
+			case BuiltInSubgroupGeMask:
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				entry_func.fixup_hooks_in.push_back([=]() {
+					// Case where index < 32, size < 32:
+					// mask0 = bfe(0xFFFFFFFF, index, size - index);
+					// mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
+					// Case where index < 32 but size >= 32:
+					// mask0 = bfe(0xFFFFFFFF, index, 32 - index);
+					// mask1 = bfe(0xFFFFFFFF, 0, size - 32);
+					// Case where index >= 32:
+					// mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
+					// mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
+					// This is expressed without branches to avoid divergent
+					// control flow--hence the complicated min/max expressions.
+					// This is further complicated by the fact that if you attempt
+					// to bfe out-of-bounds on Metal, undefined behavior is the
+					// result.
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = uint4(extract_bits(0xFFFFFFFF, min(",
+					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
+					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+					          to_expression(builtin_subgroup_invocation_id_id),
+					          ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
+					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
+					          to_expression(builtin_subgroup_size_id), " - (int)max(",
+					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
+				});
+				break;
+			case BuiltInSubgroupGtMask:
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				entry_func.fixup_hooks_in.push_back([=]() {
+					// The same logic applies here, except now the index is one
+					// more than the subgroup invocation ID.
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = uint4(extract_bits(0xFFFFFFFF, min(",
+					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
+					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+					          to_expression(builtin_subgroup_invocation_id_id),
+					          " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
+					          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
+					          to_expression(builtin_subgroup_size_id), " - (int)max(",
+					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
+				});
+				break;
+			case BuiltInSubgroupLeMask:
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+					          to_expression(builtin_subgroup_invocation_id_id),
+					          " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+					          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
+				});
+				break;
+			case BuiltInSubgroupLtMask:
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+					          to_expression(builtin_subgroup_invocation_id_id),
+					          ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
+				});
+				break;
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+				{
+					// According to the Vulkan spec, when not running under a multiview
+					// render pass, ViewIndex is 0.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
+					});
+				}
+				else if (msl_options.view_index_from_device_index)
+				{
+					// In this case, we take the view index from that of the device we're running on.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          msl_options.device_index, ";");
+					});
+					// We actually don't want to set the render_target_array_index here.
+					// Since every physical device is rendering a different view,
+					// there's no need for layered rendering here.
+				}
+				else if (get_execution_model() == ExecutionModelFragment)
+				{
+					// Because we adjusted the view index in the vertex shader, we have to
+					// adjust it back here.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				else if (get_execution_model() == ExecutionModelVertex)
+				{
+					// Metal provides no special support for multiview, so we smuggle
+					// the view index in the instance index.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
+						          " % ", to_expression(view_mask_buffer_id), "[1];");
+						statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
+						          "[1];");
+					});
+					// In addition to setting the variable itself, we also need to
+					// set the render_target_array_index with it on output. We have to
+					// offset this by the base view index, because Metal isn't in on
+					// our little game here.
+					entry_func.fixup_hooks_out.push_back([=]() {
+						statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
+						          to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				break;
+			case BuiltInDeviceIndex:
+				// Metal pipelines belong to the devices which create them, so we'll
+				// need to create a MTLPipelineState for every MTLDevice in a grouped
+				// VkDevice. We can assume, then, that the device index is constant.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          msl_options.device_index, ";");
+				});
+				break;
+			case BuiltInWorkgroupId:
+				if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId))
+					break;
+
+				// The vkCmdDispatchBase() command lets the client set the base value
+				// of WorkgroupId. Metal has no direct equivalent; we must make this
+				// adjustment ourselves.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";");
+				});
+				break;
+			case BuiltInGlobalInvocationId:
+				if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId))
+					break;
+
+				// GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize.
+				// This needs to be adjusted too.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					auto &execution = this->get_entry_point();
+					uint32_t workgroup_size_id = execution.workgroup_size.constant;
+					if (workgroup_size_id)
+						statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+						          " * ", to_expression(workgroup_size_id), ";");
+					else
+						statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+						          " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ",
+						          execution.workgroup_size.z, ");");
+				});
+				break;
 			default:
 				break;
 			}
@@ -5919,65 +8758,96 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 }
 
 // Returns the Metal index of the resource of the specified type as used by the specified variable.
-uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype)
+uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
 {
 	auto &execution = get_entry_point();
 	auto &var_dec = ir.meta[var.self].decoration;
+	auto &var_type = get<SPIRType>(var.basetype);
 	uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set;
 	uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding;
 
-	// If a matching binding has been specified, find and use it
-	auto itr = find_if(begin(resource_bindings), end(resource_bindings),
-	                   [&](const pair<MSLResourceBinding, bool> &resource) -> bool {
-		                   return var_desc_set == resource.first.desc_set && var_binding == resource.first.binding &&
-		                          execution.model == resource.first.stage;
-	                   });
+	// If a matching binding has been specified, find and use it.
+	auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding });
+
+	auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ?
+	                               SPIRVCrossDecorationResourceIndexSecondary :
+	                               SPIRVCrossDecorationResourceIndexPrimary;
+	if (plane == 1)
+		resource_decoration = SPIRVCrossDecorationResourceIndexTertiary;
+	if (plane == 2)
+		resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary;
 
 	if (itr != end(resource_bindings))
 	{
-		itr->second = true;
+		auto &remap = itr->second;
+		remap.second = true;
 		switch (basetype)
 		{
-		case SPIRType::Struct:
-			return itr->first.msl_buffer;
 		case SPIRType::Image:
-			return itr->first.msl_texture;
+			set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane);
+			return remap.first.msl_texture + plane;
 		case SPIRType::Sampler:
-			return itr->first.msl_sampler;
+			set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler);
+			return remap.first.msl_sampler;
 		default:
-			return 0;
+			set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer);
+			return remap.first.msl_buffer;
 		}
 	}
 
-	// If there is no explicit mapping of bindings to MSL, use the declared binding.
-	if (has_decoration(var.self, DecorationBinding))
-		return get_decoration(var.self, DecorationBinding);
+	// If we have already allocated an index, keep using it.
+	if (has_extended_decoration(var.self, resource_decoration))
+		return get_extended_decoration(var.self, resource_decoration);
+
+	// If we did not explicitly remap, allocate bindings on demand.
+	// We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
 
 	uint32_t binding_stride = 1;
 	auto &type = get<SPIRType>(var.basetype);
 	for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
-		binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar();
+		binding_stride *= to_array_size_literal(type, i);
+
+	assert(binding_stride != 0);
 
-	// If a binding has not been specified, revert to incrementing resource indices
+	// If a binding has not been specified, revert to incrementing resource indices.
 	uint32_t resource_index;
-	switch (basetype)
+
+	bool allocate_argument_buffer_ids = false;
+	uint32_t desc_set = 0;
+
+	if (var.storage != StorageClassPushConstant)
 	{
-	case SPIRType::Struct:
-		resource_index = next_metal_resource_index_buffer;
-		next_metal_resource_index_buffer += binding_stride;
-		break;
-	case SPIRType::Image:
-		resource_index = next_metal_resource_index_texture;
-		next_metal_resource_index_texture += binding_stride;
-		break;
-	case SPIRType::Sampler:
-		resource_index = next_metal_resource_index_sampler;
-		next_metal_resource_index_sampler += binding_stride;
-		break;
-	default:
-		resource_index = 0;
-		break;
+		desc_set = get_decoration(var.self, DecorationDescriptorSet);
+		allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set);
+	}
+
+	if (allocate_argument_buffer_ids)
+	{
+		// Allocate from a flat ID binding space.
+		resource_index = next_metal_resource_ids[desc_set];
+		next_metal_resource_ids[desc_set] += binding_stride;
+	}
+	else
+	{
+		// Allocate from plain bindings which are allocated per resource type.
+		switch (basetype)
+		{
+		case SPIRType::Image:
+			resource_index = next_metal_resource_index_texture;
+			next_metal_resource_index_texture += binding_stride;
+			break;
+		case SPIRType::Sampler:
+			resource_index = next_metal_resource_index_sampler;
+			next_metal_resource_index_sampler += binding_stride;
+			break;
+		default:
+			resource_index = next_metal_resource_index_buffer;
+			next_metal_resource_index_buffer += binding_stride;
+			break;
+		}
 	}
+
+	set_extended_decoration(var.self, resource_decoration, resource_index);
 	return resource_index;
 }
 
@@ -6008,13 +8878,28 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	if (constref)
 		decl += "const ";
 
+	// If this is a combined image-sampler for a 2D image with floating-point type,
+	// we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter
+	// for a global, then we need to emit a "dynamic" combined image-sampler.
+	// Unfortunately, this is necessary to properly support passing around
+	// combined image-samplers with Y'CbCr conversions on them.
+	bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage &&
+	                              type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) &&
+	                              spv_function_implementations.count(SPVFuncImplDynamicImageSampler);
+
 	bool builtin = is_builtin_variable(var);
-	if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)
+	if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
 		decl += type_to_glsl(type, arg.id);
 	else if (builtin)
-		decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)));
+		decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id);
 	else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type))
 		decl += join(type_to_glsl(type, arg.id), "*");
+	else if (is_dynamic_img_sampler)
+	{
+		decl += join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">");
+		// Mark the variable so that we can handle passing it to another function.
+		set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+	}
 	else
 		decl += type_to_glsl(type, arg.id);
 
@@ -6037,6 +8922,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 			// non-constant arrays, but we can create thread const from constant.
 			decl = string("thread const ") + decl;
 			decl += " (&";
+			const char *restrict_kw = to_restrict(name_id);
+			if (*restrict_kw)
+			{
+				decl += " ";
+				decl += restrict_kw;
+			}
 			decl += to_expression(name_id);
 			decl += ")";
 			decl += type_to_array_glsl(type);
@@ -6057,20 +8948,36 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 
 		if (msl_options.argument_buffers)
 		{
-			// An awkward case where we need to emit *more* address space declarations (yay!).
-			// An example is where we pass down an array of buffer pointers to leaf functions.
-			// It's a constant array containing pointers to constants.
-			// The pointer array is always constant however. E.g.
-			// device SSBO * constant (&array)[N].
-			// const device SSBO * constant (&array)[N].
-			// constant SSBO * constant (&array)[N].
-			// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
-			// we emit the buffer array on stack instead, and that seems to work just fine apparently.
-			if (storage == StorageClassUniform || storage == StorageClassStorageBuffer)
-				decl += " constant";
+			uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet);
+			if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) &&
+			    descriptor_set_is_argument_buffer(desc_set))
+			{
+				// An awkward case where we need to emit *more* address space declarations (yay!).
+				// An example is where we pass down an array of buffer pointers to leaf functions.
+				// It's a constant array containing pointers to constants.
+				// The pointer array is always constant however. E.g.
+				// device SSBO * constant (&array)[N].
+				// const device SSBO * constant (&array)[N].
+				// constant SSBO * constant (&array)[N].
+				// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
+				// we emit the buffer array on stack instead, and that seems to work just fine apparently.
+
+				// If the argument was marked as being in device address space, any pointer to member would
+				// be const device, not constant.
+				if (argument_buffer_device_storage_mask & (1u << desc_set))
+					decl += " const device";
+				else
+					decl += " constant";
+			}
 		}
 
 		decl += " (&";
+		const char *restrict_kw = to_restrict(name_id);
+		if (*restrict_kw)
+		{
+			decl += " ";
+			decl += restrict_kw;
+		}
 		decl += to_expression(name_id);
 		decl += ")";
 		decl += type_to_array_glsl(type);
@@ -6088,6 +8995,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		}
 		decl += "&";
 		decl += " ";
+		decl += to_restrict(name_id);
 		decl += to_expression(name_id);
 	}
 	else
@@ -6266,6 +9174,7 @@ void CompilerMSL::replace_illegal_names()
 		"M_2_SQRTPI",
 		"M_SQRT2",
 		"M_SQRT1_2",
+		"quad_broadcast",
 	};
 
 	static const unordered_set<string> illegal_func_names = {
@@ -6465,6 +9374,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	// Pointer?
 	if (type.pointer)
 	{
+		const char *restrict_kw;
 		type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
 		switch (type.basetype)
 		{
@@ -6476,6 +9386,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		default:
 			// Anything else can be a raw pointer.
 			type_name += "*";
+			restrict_kw = to_restrict(id);
+			if (*restrict_kw)
+			{
+				type_name += " ";
+				type_name += restrict_kw;
+			}
 			break;
 		}
 		return type_name;
@@ -6527,10 +9443,14 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		type_name = "uint";
 		break;
 	case SPIRType::Int64:
-		type_name = "long"; // Currently unsupported
+		if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
+		type_name = "long";
 		break;
 	case SPIRType::UInt64:
-		type_name = "size_t";
+		if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
+		type_name = "ulong";
 		break;
 	case SPIRType::Half:
 		type_name = "half";
@@ -6748,6 +9668,234 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	return img_type_name;
 }
 
+void CompilerMSL::emit_subgroup_op(const Instruction &i)
+{
+	const uint32_t *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+
+	// Metal 2.0 is required. iOS only supports quad ops. macOS only supports
+	// broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1).
+	// Note that iOS makes no distinction between a quad-group and a subgroup;
+	// all subgroups are quad-groups there.
+	if (!msl_options.supports_msl_version(2))
+		SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
+
+	if (msl_options.is_ios())
+	{
+		switch (op)
+		{
+		default:
+			SPIRV_CROSS_THROW("iOS only supports quad-group operations.");
+		case OpGroupNonUniformBroadcast:
+		case OpGroupNonUniformShuffle:
+		case OpGroupNonUniformShuffleXor:
+		case OpGroupNonUniformShuffleUp:
+		case OpGroupNonUniformShuffleDown:
+		case OpGroupNonUniformQuadSwap:
+		case OpGroupNonUniformQuadBroadcast:
+			break;
+		}
+	}
+
+	if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+	{
+		switch (op)
+		{
+		default:
+			SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.0 and up.");
+		case OpGroupNonUniformBroadcast:
+		case OpGroupNonUniformShuffle:
+		case OpGroupNonUniformShuffleXor:
+		case OpGroupNonUniformShuffleUp:
+		case OpGroupNonUniformShuffleDown:
+			break;
+		}
+	}
+
+	uint32_t result_type = ops[0];
+	uint32_t id = ops[1];
+
+	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+	if (scope != ScopeSubgroup)
+		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		emit_op(result_type, id, "simd_is_first()", true);
+		break;
+
+	case OpGroupNonUniformBroadcast:
+		emit_binary_func_op(result_type, id, ops[3], ops[4],
+		                    msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
+		break;
+
+	case OpGroupNonUniformBroadcastFirst:
+		emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
+		break;
+
+	case OpGroupNonUniformBallot:
+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallot");
+		break;
+
+	case OpGroupNonUniformInverseBallot:
+		emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract");
+		break;
+
+	case OpGroupNonUniformBallotBitExtract:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBallotBitExtract");
+		break;
+
+	case OpGroupNonUniformBallotFindLSB:
+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
+		break;
+
+	case OpGroupNonUniformBallotFindMSB:
+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationReduce)
+			emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
+		else if (operation == GroupOperationInclusiveScan)
+			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
+			                    "spvSubgroupBallotInclusiveBitCount");
+		else if (operation == GroupOperationExclusiveScan)
+			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
+			                    "spvSubgroupBallotExclusiveBitCount");
+		else
+			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+		break;
+	}
+
+	case OpGroupNonUniformShuffle:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
+		break;
+
+	case OpGroupNonUniformShuffleXor:
+		emit_binary_func_op(result_type, id, ops[3], ops[4],
+		                    msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
+		break;
+
+	case OpGroupNonUniformShuffleUp:
+		emit_binary_func_op(result_type, id, ops[3], ops[4],
+		                    msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
+		break;
+
+	case OpGroupNonUniformShuffleDown:
+		emit_binary_func_op(result_type, id, ops[3], ops[4],
+		                    msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
+		break;
+
+	case OpGroupNonUniformAll:
+		emit_unary_func_op(result_type, id, ops[3], "simd_all");
+		break;
+
+	case OpGroupNonUniformAny:
+		emit_unary_func_op(result_type, id, ops[3], "simd_any");
+		break;
+
+	case OpGroupNonUniformAllEqual:
+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupAllEqual");
+		break;
+
+		// clang-format off
+#define MSL_GROUP_OP(op, msl_op) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
+		else if (operation == GroupOperationInclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "simd_prefix_inclusive_" #msl_op); \
+		else if (operation == GroupOperationExclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "simd_prefix_exclusive_" #msl_op); \
+		else if (operation == GroupOperationClusteredReduce) \
+		{ \
+			/* Only cluster sizes of 4 are supported. */ \
+			uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+			if (cluster_size != 4) \
+				SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
+			emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
+		} \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+	MSL_GROUP_OP(FAdd, sum)
+	MSL_GROUP_OP(FMul, product)
+	MSL_GROUP_OP(IAdd, sum)
+	MSL_GROUP_OP(IMul, product)
+#undef MSL_GROUP_OP
+	// The others, unfortunately, don't support InclusiveScan or ExclusiveScan.
+#define MSL_GROUP_OP(op, msl_op) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
+		else if (operation == GroupOperationInclusiveScan) \
+			SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
+		else if (operation == GroupOperationExclusiveScan) \
+			SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
+		else if (operation == GroupOperationClusteredReduce) \
+		{ \
+			/* Only cluster sizes of 4 are supported. */ \
+			uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+			if (cluster_size != 4) \
+				SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
+			emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
+		} \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+	MSL_GROUP_OP(FMin, min)
+	MSL_GROUP_OP(FMax, max)
+	MSL_GROUP_OP(SMin, min)
+	MSL_GROUP_OP(SMax, max)
+	MSL_GROUP_OP(UMin, min)
+	MSL_GROUP_OP(UMax, max)
+	MSL_GROUP_OP(BitwiseAnd, and)
+	MSL_GROUP_OP(BitwiseOr, or)
+	MSL_GROUP_OP(BitwiseXor, xor)
+	MSL_GROUP_OP(LogicalAnd, and)
+	MSL_GROUP_OP(LogicalOr, or)
+	MSL_GROUP_OP(LogicalXor, xor)
+		// clang-format on
+
+	case OpGroupNonUniformQuadSwap:
+	{
+		// We can implement this easily based on the following table giving
+		// the target lane ID from the direction and current lane ID:
+		//        Direction
+		//      | 0 | 1 | 2 |
+		//   ---+---+---+---+
+		// L 0  | 1   2   3
+		// a 1  | 0   3   2
+		// n 2  | 3   0   1
+		// e 3  | 2   1   0
+		// Notice that target = source ^ (direction + 1).
+		uint32_t mask = get<SPIRConstant>(ops[4]).scalar() + 1;
+		uint32_t mask_id = ir.increase_bound_by(1);
+		set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
+		emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
+		break;
+	}
+
+	case OpGroupNonUniformQuadBroadcast:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	register_control_dependent_expression(id);
+}
+
 string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == in_type.basetype)
@@ -6807,6 +9955,7 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 	case BuiltInCullDistance:
 	case BuiltInLayer:
 	case BuiltInFragDepth:
+	case BuiltInFragStencilRefEXT:
 	case BuiltInSampleMask:
 		if (get_execution_model() == ExecutionModelTessellationControl)
 			break;
@@ -6815,6 +9964,12 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 
 		break;
 
+	case BuiltInBaryCoordNV:
+	case BuiltInBaryCoordNoPerspNV:
+		if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
+			return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
+		break;
+
 	case BuiltInTessLevelOuter:
 		if (get_execution_model() == ExecutionModelTessellationEvaluation)
 		{
@@ -6879,7 +10034,14 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInPointSize:
 		return "point_size";
 	case BuiltInPosition:
-		return "position";
+		if (position_invariant)
+		{
+			if (!msl_options.supports_msl_version(2, 1))
+				SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up.");
+			return "position, invariant";
+		}
+		else
+			return "position";
 	case BuiltInLayer:
 		return "render_target_array_index";
 	case BuiltInViewportIndex:
@@ -6900,6 +10062,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 			return "threadgroup_position_in_grid";
 		case ExecutionModelTessellationEvaluation:
 			return "patch_id";
+		case ExecutionModelFragment:
+			if (msl_options.is_ios())
+				SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS.");
+			else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2.");
+			return "primitive_id";
 		default:
 			SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model.");
 		}
@@ -6928,6 +10096,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInSamplePosition:
 		// Shouldn't be reached.
 		SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
+	case BuiltInViewIndex:
+		if (execution.model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
+		// The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
+		// so we can get it from there.
+		return "render_target_array_index";
 
 	// Fragment function out
 	case BuiltInFragDepth:
@@ -6938,6 +10112,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		else
 			return "depth(any)";
 
+	case BuiltInFragStencilRefEXT:
+		return "stencil";
+
 	// Compute function in
 	case BuiltInGlobalInvocationId:
 		return "thread_position_in_grid";
@@ -6954,13 +10131,75 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInLocalInvocationIndex:
 		return "thread_index_in_threadgroup";
 
+	case BuiltInSubgroupSize:
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "threads_per_simdgroup";
+		}
+		else
+		{
+			// thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
+			// but not in fragment.
+			return "thread_execution_width";
+		}
+
+	case BuiltInNumSubgroups:
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+		return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
+
+	case BuiltInSubgroupId:
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+		return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
+
+	case BuiltInSubgroupLocalInvocationId:
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "thread_index_in_simdgroup";
+		}
+		else
+		{
+			if (!msl_options.supports_msl_version(2))
+				SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+			return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+		}
+
+	case BuiltInSubgroupEqMask:
+	case BuiltInSubgroupGeMask:
+	case BuiltInSubgroupGtMask:
+	case BuiltInSubgroupLeMask:
+	case BuiltInSubgroupLtMask:
+		// Shouldn't be reached.
+		SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL.");
+
+	case BuiltInBaryCoordNV:
+		// TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
+		if (msl_options.is_ios())
+			SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+		else if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
+		return "barycentric_coord, center_perspective";
+
+	case BuiltInBaryCoordNoPerspNV:
+		// TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
+		if (msl_options.is_ios())
+			SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+		else if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
+		return "barycentric_coord, center_no_perspective";
+
 	default:
 		return "unsupported-built-in";
 	}
 }
 
 // Returns an MSL string type declaration for a SPIR-V builtin
-string CompilerMSL::builtin_type_decl(BuiltIn builtin)
+string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 {
 	const SPIREntryPoint &execution = get_entry_point();
 	switch (builtin)
@@ -7030,11 +10269,24 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin)
 		return "uint";
 	case BuiltInSamplePosition:
 		return "float2";
+	case BuiltInViewIndex:
+		return "uint";
+
+	case BuiltInHelperInvocation:
+		return "bool";
+
+	case BuiltInBaryCoordNV:
+	case BuiltInBaryCoordNoPerspNV:
+		// Use the type as declared, can be 1, 2 or 3 components.
+		return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
 
 	// Fragment function out
 	case BuiltInFragDepth:
 		return "float";
 
+	case BuiltInFragStencilRefEXT:
+		return "uint";
+
 	// Compute function in
 	case BuiltInGlobalInvocationId:
 	case BuiltInLocalInvocationId:
@@ -7042,10 +10294,20 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin)
 	case BuiltInWorkgroupId:
 		return "uint3";
 	case BuiltInLocalInvocationIndex:
+	case BuiltInNumSubgroups:
+	case BuiltInSubgroupId:
+	case BuiltInSubgroupSize:
+	case BuiltInSubgroupLocalInvocationId:
 		return "uint";
+	case BuiltInSubgroupEqMask:
+	case BuiltInSubgroupGeMask:
+	case BuiltInSubgroupGtMask:
+	case BuiltInSubgroupLeMask:
+	case BuiltInSubgroupLtMask:
+		return "uint4";
 
-	case BuiltInHelperInvocation:
-		return "bool";
+	case BuiltInDeviceIndex:
+		return "int";
 
 	default:
 		return "unsupported-built-in-type";
@@ -7066,11 +10328,101 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
 	return bi_arg;
 }
 
-// Returns the byte size of a struct member.
-size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
+const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const
+{
+	if (member_is_remapped_physical_type(type, index))
+		return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID));
+	else
+		return get<SPIRType>(type.member_types[index]);
+}
+
+uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
+	// Array stride in MSL is always size * array_size. sizeof(float3) == 16,
+	// unlike GLSL and HLSL where array stride would be 16 and size 12.
+
+	// We could use parent type here and recurse, but that makes creating physical type remappings
+	// far more complicated. We'd rather just create the final type, and ignore having to create the entire type
+	// hierarchy in order to compute this value, so make a temporary type on the stack.
+
+	auto basic_type = type;
+	basic_type.array.clear();
+	basic_type.array_size_literal.clear();
+	uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major);
+
+	uint32_t dimensions = uint32_t(type.array.size());
+	assert(dimensions > 0);
+	dimensions--;
+
+	// Multiply together every dimension, except the last one.
+	for (uint32_t dim = 0; dim < dimensions; dim++)
+	{
+		uint32_t array_size = to_array_size_literal(type, dim);
+		value_size *= max(array_size, 1u);
+	}
+
+	return value_size;
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_array_stride_msl(get_physical_member_type(type, index),
+	                                          member_is_packed_physical_type(type, index),
+	                                          has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const
+{
+	// For packed matrices, we just use the size of the vector type.
+	// Otherwise, MatrixStride == alignment, which is the size of the underlying vector type.
+	if (packed)
+		return (type.width / 8) * (row_major ? type.columns : type.vecsize);
+	else
+		return get_declared_type_alignment_msl(type, false, row_major);
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const
 {
-	auto &type = get<SPIRType>(struct_type.member_types[index]);
+	return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index),
+	                                           member_is_packed_physical_type(type, index),
+	                                           has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment,
+                                                   bool ignore_padding) const
+{
+	// If we have a target size, that is the declared size as well.
+	if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget))
+		return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget);
+
+	if (struct_type.member_types.empty())
+		return 0;
+
+	uint32_t mbr_cnt = uint32_t(struct_type.member_types.size());
+
+	// In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
+	uint32_t alignment = 1;
+
+	if (!ignore_alignment)
+	{
+		for (uint32_t i = 0; i < mbr_cnt; i++)
+		{
+			uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i);
+			alignment = max(alignment, mbr_alignment);
+		}
+	}
+
+	// Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends
+	// on physical size in MSL, and the size of the struct itself is then aligned to struct alignment.
+	uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1);
+	uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1);
+	msl_size = (msl_size + alignment - 1) & ~(alignment - 1);
+	return msl_size;
+}
 
+// Returns the byte size of a struct member.
+uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
 	switch (type.basetype)
 	{
 	case SPIRType::Unknown:
@@ -7083,40 +10435,47 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
 
 	default:
 	{
-		// For arrays, we can use ArrayStride to get an easy check.
-		// Runtime arrays will have zero size so force to min of one.
 		if (!type.array.empty())
 		{
 			uint32_t array_size = to_array_size_literal(type);
-			return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u);
+			return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u);
 		}
 
 		if (type.basetype == SPIRType::Struct)
+			return get_declared_struct_size_msl(type);
+
+		if (is_packed)
 		{
-			// The size of a struct in Metal is aligned up to its natural alignment.
-			auto size = get_declared_struct_size(type);
-			auto alignment = get_declared_struct_member_alignment(struct_type, index);
-			return (size + alignment - 1) & ~(alignment - 1);
+			return type.vecsize * type.columns * (type.width / 8);
 		}
+		else
+		{
+			// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
+			uint32_t vecsize = type.vecsize;
+			uint32_t columns = type.columns;
 
-		uint32_t component_size = type.width / 8;
-		uint32_t vecsize = type.vecsize;
-		uint32_t columns = type.columns;
+			if (row_major)
+				swap(vecsize, columns);
 
-		// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
-		if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked))
-			vecsize = 4;
+			if (vecsize == 3)
+				vecsize = 4;
 
-		return component_size * vecsize * columns;
+			return vecsize * columns * (type.width / 8);
+		}
 	}
 	}
 }
 
-// Returns the byte alignment of a struct member.
-size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const
+uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const
 {
-	auto &type = get<SPIRType>(struct_type.member_types[index]);
+	return get_declared_type_size_msl(get_physical_member_type(type, index),
+	                                  member_is_packed_physical_type(type, index),
+	                                  has_member_decoration(type.self, index, DecorationRowMajor));
+}
 
+// Returns the byte alignment of a type.
+uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
 	switch (type.basetype)
 	{
 	case SPIRType::Unknown:
@@ -7127,12 +10486,19 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
 	case SPIRType::Sampler:
 		SPIRV_CROSS_THROW("Querying alignment of opaque object.");
 
+	case SPIRType::Int64:
+		SPIRV_CROSS_THROW("long types are not supported in buffers in MSL.");
+	case SPIRType::UInt64:
+		SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL.");
+	case SPIRType::Double:
+		SPIRV_CROSS_THROW("double types are not supported in buffers in MSL.");
+
 	case SPIRType::Struct:
 	{
 		// In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
 		uint32_t alignment = 1;
 		for (uint32_t i = 0; i < type.member_types.size(); i++)
-			alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i)));
+			alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i)));
 		return alignment;
 	}
 
@@ -7141,25 +10507,28 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
 		// Alignment of packed type is the same as the underlying component or column size.
 		// Alignment of unpacked type is the same as the vector size.
 		// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
-		if (member_is_packed_type(struct_type, index))
-		{
-			// This is getting pretty complicated.
-			// The special case of array of float/float2 needs to be handled here.
-			uint32_t packed_type_id =
-			    get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType);
-			const SPIRType *packed_type = packed_type_id != 0 ? &get<SPIRType>(packed_type_id) : nullptr;
-			if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) &&
-			    packed_type->basetype != SPIRType::Struct)
-				return (packed_type->width / 8) * 4;
-			else
-				return (type.width / 8) * (type.columns == 3 ? 4 : type.columns);
+		if (is_packed)
+		{
+			// If we have packed_T and friends, the alignment is always scalar.
+			return type.width / 8;
 		}
 		else
-			return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
+		{
+			// This is the general rule for MSL. Size == alignment.
+			uint32_t vecsize = row_major ? type.columns : type.vecsize;
+			return (type.width / 8) * (vecsize == 3 ? 4 : vecsize);
+		}
 	}
 	}
 }
 
+uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_alignment_msl(get_physical_member_type(type, index),
+	                                       member_is_packed_physical_type(type, index),
+	                                       has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
 bool CompilerMSL::skip_argument(uint32_t) const
 {
 	return false;
@@ -7207,7 +10576,7 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar
 	case OpImageDrefGather:
 		compiler.has_sampled_images =
 		    compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2]));
-		compiler.needs_aux_buffer_def = compiler.needs_aux_buffer_def || compiler.has_sampled_images;
+		compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images;
 		break;
 	default:
 		break;
@@ -7215,6 +10584,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar
 	return true;
 }
 
+// If a needed custom function wasn't added before, add it and force a recompile.
+void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func)
+{
+	if (spv_function_implementations.count(spv_func) == 0)
+	{
+		spv_function_implementations.insert(spv_func);
+		suppress_missing_prototypes = true;
+		force_recompile();
+	}
+}
+
 bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
 {
 	// Since MSL exists in a single execution scope, function prototype declarations are not
@@ -7267,6 +10647,37 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 		uses_atomics = true;
 		break;
 
+	case OpGroupNonUniformInverseBallot:
+		needs_subgroup_invocation_id = true;
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+		if (args[3] != GroupOperationReduce)
+			needs_subgroup_invocation_id = true;
+		break;
+
+	case OpArrayLength:
+	{
+		auto *var = compiler.maybe_get_backing_variable(args[2]);
+		if (var)
+			compiler.buffers_requiring_array_length.insert(var->self);
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
+	{
+		// OpArrayLength might want to know if taking ArrayLength of an array of SSBOs.
+		uint32_t result_type = args[0];
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		compiler.set<SPIRExpression>(id, "", result_type, true);
+		compiler.register_read(id, ptr, true);
+		compiler.ir.ids[id].set_allow_type_rewrite();
+		break;
+	}
+
 	default:
 		break;
 	}
@@ -7361,32 +10772,6 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]];
 		if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native)
 			return SPVFuncImplTexelBufferCoords;
-
-		if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples)
-			return SPVFuncImplTextureSwizzle;
-
-		break;
-	}
-
-	case OpImageSampleExplicitLod:
-	case OpImageSampleProjExplicitLod:
-	case OpImageSampleDrefExplicitLod:
-	case OpImageSampleProjDrefExplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleProjDrefImplicitLod:
-	case OpImageGather:
-	case OpImageDrefGather:
-		if (compiler.msl_options.swizzle_texture_samples)
-			return SPVFuncImplTextureSwizzle;
-		break;
-
-	case OpCompositeConstruct:
-	{
-		auto &type = compiler.get<SPIRType>(args[0]);
-		if (type.array.size() > 1) // We need to use copies to build the composite.
-			return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size() - 1);
 		break;
 	}
 
@@ -7395,7 +10780,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		uint32_t extension_set = args[2];
 		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
 		{
-			GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
+			auto op_450 = static_cast<GLSLstd450>(args[3]);
 			switch (op_450)
 			{
 			case GLSLstd450Radians:
@@ -7410,6 +10795,27 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 				return SPVFuncImplFindUMsb;
 			case GLSLstd450SSign:
 				return SPVFuncImplSSign;
+			case GLSLstd450Reflect:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplReflectScalar;
+				break;
+			}
+			case GLSLstd450Refract:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplRefractScalar;
+				break;
+			}
+			case GLSLstd450FaceForward:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplFaceForwardScalar;
+				break;
+			}
 			case GLSLstd450MatrixInverse:
 			{
 				auto &mat_type = compiler.get<SPIRType>(args[0]);
@@ -7433,6 +10839,25 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		break;
 	}
 
+	case OpGroupNonUniformBallot:
+		return SPVFuncImplSubgroupBallot;
+
+	case OpGroupNonUniformInverseBallot:
+	case OpGroupNonUniformBallotBitExtract:
+		return SPVFuncImplSubgroupBallotBitExtract;
+
+	case OpGroupNonUniformBallotFindLSB:
+		return SPVFuncImplSubgroupBallotFindLSB;
+
+	case OpGroupNonUniformBallotFindMSB:
+		return SPVFuncImplSubgroupBallotFindMSB;
+
+	case OpGroupNonUniformBallotBitCount:
+		return SPVFuncImplSubgroupBallotBitCount;
+
+	case OpGroupNonUniformAllEqual:
+		return SPVFuncImplSubgroupAllEqual;
+
 	default:
 		break;
 	}
@@ -7497,14 +10922,20 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
 	meta.members.resize(max(type.member_types.size(), meta.members.size()));
 }
 
-void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler)
+void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler)
 {
 	auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
 	if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
 		SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type.");
 	if (!type.array.empty())
 		SPIRV_CROSS_THROW("Can not remap array of samplers.");
-	constexpr_samplers[id] = sampler;
+	constexpr_samplers_by_id[id] = sampler;
+}
+
+void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding,
+                                                     const MSLConstexprSampler &sampler)
+{
+	constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
 }
 
 void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
@@ -7529,6 +10960,15 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
 	case BuiltInNumWorkgroups:
 	case BuiltInLayer:
 	case BuiltInViewportIndex:
+	case BuiltInFragStencilRefEXT:
+	case BuiltInPrimitiveId:
+	case BuiltInSubgroupSize:
+	case BuiltInSubgroupLocalInvocationId:
+	case BuiltInViewIndex:
+	case BuiltInVertexIndex:
+	case BuiltInInstanceIndex:
+	case BuiltInBaseInstance:
+	case BuiltInBaseVertex:
 		expected_type = SPIRType::UInt;
 		break;
 
@@ -7569,6 +11009,9 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 	{
 	case BuiltInLayer:
 	case BuiltInViewportIndex:
+	case BuiltInFragStencilRefEXT:
+	case BuiltInPrimitiveId:
+	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
 		break;
 
@@ -7639,9 +11082,14 @@ void CompilerMSL::analyze_argument_buffers()
 		string name;
 		SPIRType::BaseType basetype;
 		uint32_t index;
+		uint32_t plane;
 	};
 	SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
 
+	bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {};
+	bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {};
+	bool needs_buffer_sizes = false;
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) {
 		if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
 		     var.storage == StorageClassStorageBuffer) &&
@@ -7658,36 +11106,116 @@ void CompilerMSL::analyze_argument_buffers()
 			if (desc_set >= kMaxArgumentBuffers)
 				SPIRV_CROSS_THROW("Descriptor set index is out of range.");
 
+			const MSLConstexprSampler *constexpr_sampler = nullptr;
+			if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
+			{
+				constexpr_sampler = find_constexpr_sampler(var_id);
+				if (constexpr_sampler)
+				{
+					// Mark this ID as a constexpr sampler for later in case it came from set/bindings.
+					constexpr_samplers_by_id[var_id] = *constexpr_sampler;
+				}
+			}
+
 			if (type.basetype == SPIRType::SampledImage)
 			{
 				add_resource_name(var_id);
 
-				uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image);
-				uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
-
-				// Avoid trivial conflicts where we didn't remap.
-				// This will let us at least compile test cases without having to instrument remaps.
-				if (sampler_resource_index == image_resource_index)
-					sampler_resource_index += type.array.empty() ? 1 : to_array_size_literal(type);
+				uint32_t plane_count = 1;
+				if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+					plane_count = constexpr_sampler->planes;
 
-				resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index });
+				for (uint32_t i = 0; i < plane_count; i++)
+				{
+					uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
+					resources_in_set[desc_set].push_back(
+					    { &var, to_name(var_id), SPIRType::Image, image_resource_index, i });
+				}
 
-				if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0)
+				if (type.image.dim != DimBuffer && !constexpr_sampler)
 				{
+					uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
 					resources_in_set[desc_set].push_back(
-					    { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index });
+					    { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 });
 				}
 			}
-			else if (constexpr_samplers.count(var_id) == 0)
+			else if (!constexpr_sampler)
 			{
 				// constexpr samplers are not declared as resources.
-				add_resource_name(var_id);
-				resources_in_set[desc_set].push_back(
-				    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+				if (!msl_options.is_ios() || type.basetype != SPIRType::Image || type.image.sampled != 2)
+				{
+					add_resource_name(var_id);
+					resources_in_set[desc_set].push_back(
+					    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
+				}
+			}
+
+			// Check if this descriptor set needs a swizzle buffer.
+			if (needs_swizzle_buffer_def && is_sampled_image_type(type))
+				set_needs_swizzle_buffer[desc_set] = true;
+			else if (buffers_requiring_array_length.count(var_id) != 0)
+			{
+				set_needs_buffer_sizes[desc_set] = true;
+				needs_buffer_sizes = true;
 			}
 		}
 	});
 
+	if (needs_swizzle_buffer_def || needs_buffer_sizes)
+	{
+		uint32_t uint_ptr_type_id = 0;
+
+		// We might have to add a swizzle buffer resource to the set.
+		for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
+		{
+			if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set])
+				continue;
+
+			if (uint_ptr_type_id == 0)
+			{
+				uint32_t offset = ir.increase_bound_by(2);
+				uint32_t type_id = offset;
+				uint_ptr_type_id = offset + 1;
+
+				// Create a buffer to hold extra data, including the swizzle constants.
+				SPIRType uint_type;
+				uint_type.basetype = SPIRType::UInt;
+				uint_type.width = 32;
+				set<SPIRType>(type_id, uint_type);
+
+				SPIRType uint_type_pointer = uint_type;
+				uint_type_pointer.pointer = true;
+				uint_type_pointer.pointer_depth = 1;
+				uint_type_pointer.parent_type = type_id;
+				uint_type_pointer.storage = StorageClassUniform;
+				set<SPIRType>(uint_ptr_type_id, uint_type_pointer);
+				set_decoration(uint_ptr_type_id, DecorationArrayStride, 4);
+			}
+
+			if (set_needs_swizzle_buffer[desc_set])
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+				auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
+				set_name(var_id, "spvSwizzleConstants");
+				set_decoration(var_id, DecorationDescriptorSet, desc_set);
+				set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
+				resources_in_set[desc_set].push_back(
+				    { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
+			}
+
+			if (set_needs_buffer_sizes[desc_set])
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+				auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
+				set_name(var_id, "spvBufferSizeConstants");
+				set_decoration(var_id, DecorationDescriptorSet, desc_set);
+				set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
+				resources_in_set[desc_set].push_back(
+				    { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
+			}
+		}
+	}
+
 	for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
 	{
 		auto &resources = resources_in_set[desc_set];
@@ -7702,8 +11230,20 @@ void CompilerMSL::analyze_argument_buffers()
 		argument_buffer_ids[desc_set] = next_id;
 
 		auto &buffer_type = set<SPIRType>(type_id);
-		buffer_type.storage = StorageClassUniform;
+
 		buffer_type.basetype = SPIRType::Struct;
+
+		if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0)
+		{
+			buffer_type.storage = StorageClassStorageBuffer;
+			// Make sure the argument buffer gets marked as const device.
+			set_decoration(next_id, DecorationNonWritable);
+			// Need to mark the type as a Block to enable this.
+			set_decoration(type_id, DecorationBlock);
+		}
+		else
+			buffer_type.storage = StorageClassUniform;
+
 		set_name(type_id, join("spvDescriptorSetBuffer", desc_set));
 
 		auto &ptr_type = set<SPIRType>(ptr_type_id);
@@ -7727,6 +11267,8 @@ void CompilerMSL::analyze_argument_buffers()
 			auto &var = *resource.var;
 			auto &type = get_variable_data_type(var);
 			string mbr_name = ensure_valid_name(resource.name, "m");
+			if (resource.plane > 0)
+				mbr_name += join(plane_name_suffix, resource.plane);
 			set_member_name(buffer_type.self, member_index, mbr_name);
 
 			if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler)
@@ -7754,12 +11296,22 @@ void CompilerMSL::analyze_argument_buffers()
 			}
 			else
 			{
+				uint32_t binding = get_decoration(var.self, DecorationBinding);
+				SetBindingPair pair = { desc_set, binding };
+
 				if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
 				    resource.basetype == SPIRType::SampledImage)
 				{
 					// Drop pointer information when we emit the resources into a struct.
 					buffer_type.member_types.push_back(get_variable_data_type_id(var));
-					set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
+					if (resource.plane == 0)
+						set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
+				}
+				else if (buffers_requiring_dynamic_offset.count(pair))
+				{
+					// Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
+					buffer_type.member_types.push_back(var.basetype);
+					buffers_requiring_dynamic_offset[pair].second = var.self;
 				}
 				else
 				{
@@ -7772,7 +11324,7 @@ void CompilerMSL::analyze_argument_buffers()
 				}
 			}
 
-			set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationArgumentBufferID,
+			set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary,
 			                               resource.index);
 			set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID,
 			                               var.self);
@@ -7780,3 +11332,35 @@ void CompilerMSL::analyze_argument_buffers()
 		}
 	}
 }
+
+bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const
+{
+	return desc_set == other.desc_set && binding == other.binding;
+}
+
+bool CompilerMSL::SetBindingPair::operator<(const SetBindingPair &other) const
+{
+	return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding);
+}
+
+bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const
+{
+	return model == other.model && desc_set == other.desc_set && binding == other.binding;
+}
+
+size_t CompilerMSL::InternalHasher::operator()(const SetBindingPair &value) const
+{
+	// Quality of hash doesn't really matter here.
+	auto hash_set = std::hash<uint32_t>()(value.desc_set);
+	auto hash_binding = std::hash<uint32_t>()(value.binding);
+	return (hash_set * 0x10001b31) ^ hash_binding;
+}
+
+size_t CompilerMSL::InternalHasher::operator()(const StageSetBinding &value) const
+{
+	// Quality of hash doesn't really matter here.
+	auto hash_model = std::hash<uint32_t>()(value.model);
+	auto hash_set = std::hash<uint32_t>()(value.desc_set);
+	auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set;
+	return (tmp_hash * 0x10001b31) ^ value.binding;
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
index 8d3a8ad..d16b593 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
@@ -20,6 +20,7 @@
 #include "spirv_glsl.hpp"
 #include <map>
 #include <set>
+#include <stddef.h>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -53,9 +54,9 @@ struct MSLVertexAttr
 // Matches the binding index of a MSL resource for a binding within a descriptor set.
 // Taken together, the stage, desc_set and binding combine to form a reference to a resource
 // descriptor used in a particular shading stage.
-// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set,
-// the binding reference we remap to will become an [[id(N)]] attribute within
-// the "descriptor set" argument buffer structure.
+// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set,
+// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we
+// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure.
 // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a
 // [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
 struct MSLResourceBinding
@@ -121,6 +122,50 @@ enum MSLSamplerBorderColor
 	MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff
 };
 
+enum MSLFormatResolution
+{
+	MSL_FORMAT_RESOLUTION_444 = 0,
+	MSL_FORMAT_RESOLUTION_422,
+	MSL_FORMAT_RESOLUTION_420,
+	MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+};
+
+enum MSLChromaLocation
+{
+	MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+	MSL_CHROMA_LOCATION_MIDPOINT,
+	MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+};
+
+enum MSLComponentSwizzle
+{
+	MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+	MSL_COMPONENT_SWIZZLE_ZERO,
+	MSL_COMPONENT_SWIZZLE_ONE,
+	MSL_COMPONENT_SWIZZLE_R,
+	MSL_COMPONENT_SWIZZLE_G,
+	MSL_COMPONENT_SWIZZLE_B,
+	MSL_COMPONENT_SWIZZLE_A,
+	MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrModelConversion
+{
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrRange
+{
+	MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+	MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+	MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+};
+
 struct MSLConstexprSampler
 {
 	MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED;
@@ -136,13 +181,40 @@ struct MSLConstexprSampler
 	float lod_clamp_max = 1000.0f;
 	int max_anisotropy = 1;
 
+	// Sampler Y'CbCr conversion parameters
+	uint32_t planes = 0;
+	MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444;
+	MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST;
+	MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+	MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+	MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY
+	MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
+	MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL;
+	uint32_t bpc = 8;
+
 	bool compare_enable = false;
 	bool lod_clamp_enable = false;
 	bool anisotropy_enable = false;
-};
+	bool ycbcr_conversion_enable = false;
 
-// Tracks the type ID and member index of a struct member
-using MSLStructMemberKey = uint64_t;
+	MSLConstexprSampler()
+	{
+		for (uint32_t i = 0; i < 4; i++)
+			swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY;
+	}
+	bool swizzle_is_identity() const
+	{
+		return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY &&
+		        swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY);
+	}
+	bool swizzle_has_one_or_zero() const
+	{
+		return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE);
+	}
+};
 
 // Special constant used in a MSLResourceBinding desc_set
 // element to indicate the bindings for the push constants.
@@ -152,11 +224,21 @@ static const uint32_t kPushConstDescSet = ~(0u);
 // element to indicate the bindings for the push constants.
 static const uint32_t kPushConstBinding = 0;
 
-static const uint32_t kMaxArgumentBuffers = 8;
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding for swizzle buffers.
+static const uint32_t kSwizzleBufferBinding = ~(1u);
+
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
+static const uint32_t kBufferSizeBufferBinding = ~(2u);
 
-// The current version of the aux buffer structure. It must be incremented any time a
-// new field is added to the aux buffer.
-#define SPIRV_CROSS_MSL_AUX_BUFFER_STRUCT_VERSION 1
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding used for the argument buffer itself.
+// This buffer binding should be kept as small as possible as all automatic bindings for buffers
+// will start at max(kArgumentBufferBinding) + 1.
+static const uint32_t kArgumentBufferBinding = ~(3u);
+
+static const uint32_t kMaxArgumentBuffers = 8;
 
 // Decompiles SPIR-V to Metal Shading Language
 class CompilerMSL : public CompilerGLSL
@@ -174,17 +256,24 @@ public:
 		Platform platform = macOS;
 		uint32_t msl_version = make_msl_version(1, 2);
 		uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers
-		uint32_t aux_buffer_index = 30;
+		uint32_t swizzle_buffer_index = 30;
 		uint32_t indirect_params_buffer_index = 29;
 		uint32_t shader_output_buffer_index = 28;
 		uint32_t shader_patch_output_buffer_index = 27;
 		uint32_t shader_tess_factor_buffer_index = 26;
+		uint32_t buffer_size_buffer_index = 25;
+		uint32_t view_mask_buffer_index = 24;
+		uint32_t dynamic_offsets_buffer_index = 23;
 		uint32_t shader_input_wg_index = 0;
+		uint32_t device_index = 0;
 		bool enable_point_size_builtin = true;
 		bool disable_rasterization = false;
 		bool capture_output_to_buffer = false;
 		bool swizzle_texture_samples = false;
 		bool tess_domain_origin_lower_left = false;
+		bool multiview = false;
+		bool view_index_from_device_index = false;
+		bool dispatch_base = false;
 
 		// Enable use of MSL 2.0 indirect argument buffers.
 		// MSL 2.0 must also be enabled.
@@ -212,7 +301,7 @@ public:
 			msl_version = make_msl_version(major, minor, patch);
 		}
 
-		bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
+		bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const
 		{
 			return msl_version >= make_msl_version(major, minor, patch);
 		}
@@ -243,31 +332,52 @@ public:
 	}
 
 	// Provide feedback to calling API to allow it to pass an auxiliary
-	// buffer if the shader needs it.
-	bool needs_aux_buffer() const
+	// swizzle buffer if the shader needs it.
+	bool needs_swizzle_buffer() const
+	{
+		return used_swizzle_buffer;
+	}
+
+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing STORAGE_BUFFER buffer sizes to support OpArrayLength.
+	bool needs_buffer_size_buffer() const
+	{
+		return !buffers_requiring_array_length.empty();
+	}
+
+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing the view mask for the current multiview subpass.
+	bool needs_view_mask_buffer() const
 	{
-		return used_aux_buffer;
+		return msl_options.multiview && !msl_options.view_index_from_device_index;
+	}
+
+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing the dispatch base workgroup ID.
+	bool needs_dispatch_base_buffer() const
+	{
+		return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2);
 	}
 
 	// Provide feedback to calling API to allow it to pass an output
 	// buffer if the shader needs it.
 	bool needs_output_buffer() const
 	{
-		return capture_output_to_buffer && stage_out_var_id != 0;
+		return capture_output_to_buffer && stage_out_var_id != ID(0);
 	}
 
 	// Provide feedback to calling API to allow it to pass a patch output
 	// buffer if the shader needs it.
 	bool needs_patch_output_buffer() const
 	{
-		return capture_output_to_buffer && patch_stage_out_var_id != 0;
+		return capture_output_to_buffer && patch_stage_out_var_id != ID(0);
 	}
 
 	// Provide feedback to calling API to allow it to pass an input threadgroup
 	// buffer if the shader needs it.
 	bool needs_input_threadgroup_mem() const
 	{
-		return capture_output_to_buffer && stage_in_var_id != 0;
+		return capture_output_to_buffer && stage_in_var_id != ID(0);
 	}
 
 	explicit CompilerMSL(std::vector<uint32_t> spirv);
@@ -288,14 +398,52 @@ public:
 	// the set/binding combination was used by the MSL code.
 	void add_msl_resource_binding(const MSLResourceBinding &resource);
 
+	// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
+	// in this shader. index is the index within the dynamic offset buffer to use. This
+	// function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
+	// or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers
+	// are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with
+	// an offset taken from the dynamic offset buffer.
+	void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
+
 	// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
 	// This corresponds to VK_KHR_push_descriptor in Vulkan.
 	void add_discrete_descriptor_set(uint32_t desc_set);
 
+	// If an argument buffer is large enough, it may need to be in the device storage space rather than
+	// constant. Opt-in to this behavior here on a per set basis.
+	void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage);
+
 	// Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader.
 	bool is_msl_vertex_attribute_used(uint32_t location);
+
+	// NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here.
+	// Constexpr samplers are always assumed to be emitted.
+	// No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped
+	// by remap_constexpr_sampler(_by_binding).
 	bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding);
 
+	// This must only be called after a successful call to CompilerMSL::compile().
+	// For a variable resource ID obtained through reflection API, report the automatically assigned resource index.
+	// If the descriptor set was part of an argument buffer, report the [[id(N)]],
+	// or [[buffer/texture/sampler]] binding for other resources.
+	// If the resource was a combined image sampler, report the image binding here,
+	// use the _secondary version of this call to query the sampler half of the resource.
+	// If no binding exists, uint32_t(-1) is returned.
+	uint32_t get_automatic_msl_resource_binding(uint32_t id) const;
+
+	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the
+	// sampler's binding is returned instead. For any other resource type, -1 is returned.
+	uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const;
+
+	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images,
+	// in which case the second plane's binding is returned instead. For any other resource type, -1 is returned.
+	uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const;
+
+	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images,
+	// in which case the third plane's binding is returned instead. For any other resource type, -1 is returned.
+	uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const;
+
 	// Compiles the SPIR-V code into Metal Shading Language.
 	std::string compile() override;
 
@@ -305,7 +453,12 @@ public:
 	// The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler.
 	// This can be used on both combined image/samplers (sampler2D) or standalone samplers.
 	// The remapped sampler must not be an array of samplers.
-	void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
+	// Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways.
+	void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler);
+
+	// Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID.
+	// Remaps based on ID take priority over set/binding remaps.
+	void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler);
 
 	// If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect
 	// to use for a particular location. The default is 4 if number of components is not overridden.
@@ -337,13 +490,44 @@ protected:
 		SPVFuncImplInverse4x4,
 		SPVFuncImplInverse3x3,
 		SPVFuncImplInverse2x2,
-		SPVFuncImplRowMajor2x3,
-		SPVFuncImplRowMajor2x4,
-		SPVFuncImplRowMajor3x2,
-		SPVFuncImplRowMajor3x4,
-		SPVFuncImplRowMajor4x2,
-		SPVFuncImplRowMajor4x3,
+		// It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
+		// emitted before them.
+		SPVFuncImplForwardArgs,
+		// Likewise, this must come before *Swizzle.
+		SPVFuncImplGetSwizzle,
 		SPVFuncImplTextureSwizzle,
+		SPVFuncImplGatherSwizzle,
+		SPVFuncImplGatherCompareSwizzle,
+		SPVFuncImplSubgroupBallot,
+		SPVFuncImplSubgroupBallotBitExtract,
+		SPVFuncImplSubgroupBallotFindLSB,
+		SPVFuncImplSubgroupBallotFindMSB,
+		SPVFuncImplSubgroupBallotBitCount,
+		SPVFuncImplSubgroupAllEqual,
+		SPVFuncImplReflectScalar,
+		SPVFuncImplRefractScalar,
+		SPVFuncImplFaceForwardScalar,
+		SPVFuncImplChromaReconstructNearest2Plane,
+		SPVFuncImplChromaReconstructNearest3Plane,
+		SPVFuncImplChromaReconstructLinear422CositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear422CositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear422Midpoint2Plane,
+		SPVFuncImplChromaReconstructLinear422Midpoint3Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane,
+		SPVFuncImplExpandITUFullRange,
+		SPVFuncImplExpandITUNarrowRange,
+		SPVFuncImplConvertYCbCrBT709,
+		SPVFuncImplConvertYCbCrBT601,
+		SPVFuncImplConvertYCbCrBT2020,
+		SPVFuncImplDynamicImageSampler,
+
 		SPVFuncImplArrayCopyMultidimMax = 6
 	};
 
@@ -351,30 +535,37 @@ protected:
 	void emit_instruction(const Instruction &instr) override;
 	void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                  uint32_t count) override;
+	void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                           const uint32_t *args, uint32_t count) override;
 	void emit_header() override;
 	void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
+	void emit_subgroup_op(const Instruction &i) override;
+	std::string to_texture_op(const Instruction &i, bool *forward,
+	                          SmallVector<uint32_t> &inherited_expressions) override;
 	void emit_fixup() override;
 	std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                             const std::string &qualifier = "");
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
+	void emit_struct_padding_target(const SPIRType &type) override;
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string sampler_type(const SPIRType &type);
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
-	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
-	std::string to_func_call_arg(uint32_t id) override;
+	std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
 	std::string to_name(uint32_t id, bool allow_alias = true) const override;
-	std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
-	                             bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref,
-	                             uint32_t lod) override;
-	std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
+	std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
+	                             bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod,
+	                             uint32_t minlod) override;
+	std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
 	                             uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x,
 	                             uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
-	                             uint32_t comp, uint32_t sample, bool *p_forward) override;
+	                             uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override;
 	std::string to_initializer_expression(const SPIRVariable &var) override;
-	std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override;
+	std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+	                                   bool is_packed, bool row_major) override;
+
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
 	bool skip_argument(uint32_t id) const override;
 	std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override;
@@ -385,7 +576,8 @@ protected:
 	bool is_patch_block(const SPIRType &type);
 	bool is_non_native_row_major_matrix(uint32_t id) override;
 	bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
-	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
+	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+	                                     bool is_packed) override;
 
 	void preprocess_op_codes();
 	void localize_global_variables();
@@ -426,7 +618,7 @@ protected:
 	void emit_specialization_constants_and_structs();
 	void emit_interface_block(uint32_t ib_var_id);
 	bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
-	void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows);
+
 	void fix_up_shader_inputs_outputs();
 
 	std::string func_type_decl(SPIRType &type);
@@ -439,21 +631,43 @@ protected:
 	std::string ensure_valid_name(std::string name, std::string pfx);
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_swizzle_expression(uint32_t id);
+	std::string to_buffer_size_expression(uint32_t id);
 	std::string builtin_qualifier(spv::BuiltIn builtin);
-	std::string builtin_type_decl(spv::BuiltIn builtin);
+	std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0);
 	std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma);
 	std::string member_attribute_qualifier(const SPIRType &type, uint32_t index);
 	std::string argument_decl(const SPIRFunction::Parameter &arg);
 	std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp);
-	uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype);
+	uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0);
 	uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
-	size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
+
+	// MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output.
+	// These values can change depending on various extended decorations which control packing rules.
+	// We need to make these rules match up with SPIR-V declared rules.
+	uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const;
+
+	uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
+
+	const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const;
+
+	uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false,
+	                                      bool ignore_padding = false) const;
+
 	std::string to_component_argument(uint32_t id);
-	void align_struct(SPIRType &ib_type);
-	bool is_member_packable(SPIRType &ib_type, uint32_t index);
-	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
+	void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs);
+	void mark_scalar_layout_structs(const SPIRType &ib_type);
+	void mark_struct_members_packed(const SPIRType &type);
+	void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index);
+	bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
 	std::string get_argument_address_space(const SPIRVariable &argument);
-	std::string get_type_address_space(const SPIRType &type, uint32_t id);
+	std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
+	const char *to_restrict(uint32_t id, bool space = true);
 	SPIRType &get_stage_in_struct_type();
 	SPIRType &get_stage_out_struct_type();
 	SPIRType &get_patch_stage_in_struct_type();
@@ -466,8 +680,10 @@ protected:
 	void add_pragma_line(const std::string &line);
 	void add_typedef_line(const std::string &line);
 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
-	void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
+	void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
+	                     spv::StorageClass rhs_storage) override;
 	void build_implicit_builtins();
+	uint32_t build_constant_uint_array_pointer();
 	void emit_entry_point_declarations() override;
 	uint32_t builtin_frag_coord_id = 0;
 	uint32_t builtin_sample_id_id = 0;
@@ -475,9 +691,17 @@ protected:
 	uint32_t builtin_base_vertex_id = 0;
 	uint32_t builtin_instance_idx_id = 0;
 	uint32_t builtin_base_instance_id = 0;
+	uint32_t builtin_view_idx_id = 0;
+	uint32_t builtin_layer_id = 0;
 	uint32_t builtin_invocation_id_id = 0;
 	uint32_t builtin_primitive_id_id = 0;
-	uint32_t aux_buffer_id = 0;
+	uint32_t builtin_subgroup_invocation_id_id = 0;
+	uint32_t builtin_subgroup_size_id = 0;
+	uint32_t builtin_dispatch_base_id = 0;
+	uint32_t swizzle_buffer_id = 0;
+	uint32_t buffer_size_buffer_id = 0;
+	uint32_t view_mask_buffer_id = 0;
+	uint32_t dynamic_offsets_buffer_id = 0;
 
 	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
 	void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@@ -488,36 +712,65 @@ protected:
 	bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
 	bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);
 
+	void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
+
+	std::string convert_to_f32(const std::string &expr, uint32_t components);
+
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;
 	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_builtin;
 	std::unordered_set<uint32_t> vtx_attrs_in_use;
 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
-	std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
 	std::set<std::string> pragma_lines;
 	std::set<std::string> typedef_lines;
 	SmallVector<uint32_t> vars_needing_early_declaration;
 
-	SmallVector<std::pair<MSLResourceBinding, bool>> resource_bindings;
+	struct SetBindingPair
+	{
+		uint32_t desc_set;
+		uint32_t binding;
+		bool operator==(const SetBindingPair &other) const;
+		bool operator<(const SetBindingPair &other) const;
+	};
+
+	struct StageSetBinding
+	{
+		spv::ExecutionModel model;
+		uint32_t desc_set;
+		uint32_t binding;
+		bool operator==(const StageSetBinding &other) const;
+	};
+
+	struct InternalHasher
+	{
+		size_t operator()(const SetBindingPair &value) const;
+		size_t operator()(const StageSetBinding &value) const;
+	};
+
+	std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
+
 	uint32_t next_metal_resource_index_buffer = 0;
 	uint32_t next_metal_resource_index_texture = 0;
 	uint32_t next_metal_resource_index_sampler = 0;
-
-	uint32_t stage_in_var_id = 0;
-	uint32_t stage_out_var_id = 0;
-	uint32_t patch_stage_in_var_id = 0;
-	uint32_t patch_stage_out_var_id = 0;
-	uint32_t stage_in_ptr_var_id = 0;
-	uint32_t stage_out_ptr_var_id = 0;
+	// Intentionally uninitialized, works around MSVC 2013 bug.
+	uint32_t next_metal_resource_ids[kMaxArgumentBuffers];
+
+	VariableID stage_in_var_id = 0;
+	VariableID stage_out_var_id = 0;
+	VariableID patch_stage_in_var_id = 0;
+	VariableID patch_stage_out_var_id = 0;
+	VariableID stage_in_ptr_var_id = 0;
+	VariableID stage_out_ptr_var_id = 0;
 	bool has_sampled_images = false;
 	bool needs_vertex_idx_arg = false;
 	bool needs_instance_idx_arg = false;
 	bool is_rasterization_disabled = false;
 	bool capture_output_to_buffer = false;
-	bool needs_aux_buffer_def = false;
-	bool used_aux_buffer = false;
+	bool needs_swizzle_buffer_def = false;
+	bool used_swizzle_buffer = false;
 	bool added_builtin_tess_level = false;
+	bool needs_subgroup_invocation_id = false;
 	std::string qual_pos_var_name;
 	std::string stage_in_var_name = "in";
 	std::string stage_out_var_name = "out";
@@ -525,17 +778,29 @@ protected:
 	std::string patch_stage_out_var_name = "patchOut";
 	std::string sampler_name_suffix = "Smplr";
 	std::string swizzle_name_suffix = "Swzl";
+	std::string buffer_size_name_suffix = "BufferSize";
+	std::string plane_name_suffix = "Plane";
 	std::string input_wg_var_name = "gl_in";
 	std::string output_buffer_var_name = "spvOut";
 	std::string patch_output_buffer_var_name = "spvPatchOut";
 	std::string tess_factor_buffer_var_name = "spvTessLevel";
 	spv::Op previous_instruction_opcode = spv::OpNop;
 
-	std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;
+	// Must be ordered since declaration is in a specific order.
+	std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id;
+	std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding;
+	const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const;
+
+	std::unordered_set<uint32_t> buffers_requiring_array_length;
 	SmallVector<uint32_t> buffer_arrays;
 
+	// Must be ordered since array is in a specific order.
+	std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
+
 	uint32_t argument_buffer_ids[kMaxArgumentBuffers];
 	uint32_t argument_buffer_discrete_mask = 0;
+	uint32_t argument_buffer_device_storage_mask = 0;
+
 	void analyze_argument_buffers();
 	bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
 
@@ -544,6 +809,8 @@ protected:
 
 	bool suppress_missing_prototypes = false;
 
+	void add_spv_func_and_recompile(SPVFuncImpl spv_func);
+
 	// OpcodeHandler that handles several MSL preprocessing operations.
 	struct OpCodePreprocessor : OpcodeHandler
 	{
@@ -561,6 +828,7 @@ protected:
 		bool suppress_missing_prototypes = false;
 		bool uses_atomics = false;
 		bool uses_resource_write = false;
+		bool needs_subgroup_invocation_id = false;
 	};
 
 	// OpcodeHandler that scans for uses of sampled images
diff --git a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
index 1c0a830..08dcff9 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
@@ -60,6 +60,7 @@ static bool is_valid_spirv_version(uint32_t version)
 	case 0x10200: // SPIR-V 1.2
 	case 0x10300: // SPIR-V 1.3
 	case 0x10400: // SPIR-V 1.4
+	case 0x10500: // SPIR-V 1.5
 		return true;
 
 	default:
@@ -162,12 +163,15 @@ void Parser::parse(const Instruction &instruction)
 	case OpSourceContinued:
 	case OpSourceExtension:
 	case OpNop:
-	case OpLine:
-	case OpNoLine:
-	case OpString:
 	case OpModuleProcessed:
 		break;
 
+	case OpString:
+	{
+		set<SPIRString>(ops[0], extract_string(ir.spirv, instruction.offset + 1));
+		break;
+	}
+
 	case OpMemoryModel:
 		ir.addressing_model = static_cast<AddressingModel>(ops[0]);
 		ir.memory_model = static_cast<MemoryModel>(ops[1]);
@@ -240,6 +244,8 @@ void Parser::parse(const Instruction &instruction)
 		auto ext = extract_string(ir.spirv, instruction.offset + 1);
 		if (ext == "GLSL.std.450")
 			set<SPIRExtension>(id, SPIRExtension::GLSL);
+		else if (ext == "DebugInfo")
+			set<SPIRExtension>(id, SPIRExtension::SPV_debug_info);
 		else if (ext == "SPV_AMD_shader_ballot")
 			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
 		else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
@@ -256,6 +262,14 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpExtInst:
+	{
+		// The SPIR-V debug information extended instructions might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	case OpEntryPoint:
 	{
 		auto itr =
@@ -265,7 +279,9 @@ void Parser::parse(const Instruction &instruction)
 
 		// Strings need nul-terminator and consume the whole word.
 		uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2);
-		e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length);
+
+		for (uint32_t i = strlen_words + 2; i < instruction.length; i++)
+			e.interface_variables.push_back(ops[i]);
 
 		// Set the name of the entry point in case OpName is not provided later.
 		ir.set_name(ops[1], e.name);
@@ -556,10 +572,6 @@ void Parser::parse(const Instruction &instruction)
 		type.image.sampled = ops[6];
 		type.image.format = static_cast<ImageFormat>(ops[7]);
 		type.image.access = (length >= 9) ? static_cast<AccessQualifier>(ops[8]) : AccessQualifierMax;
-
-		if (type.image.sampled == 0)
-			SPIRV_CROSS_THROW("OpTypeImage Sampled parameter must not be zero.");
-
 		break;
 	}
 
@@ -649,7 +661,7 @@ void Parser::parse(const Instruction &instruction)
 				}
 			}
 
-			if (type.type_alias == 0)
+			if (type.type_alias == TypeID(0))
 				global_struct_cache.push_back(id);
 		}
 		break;
@@ -999,12 +1011,12 @@ void Parser::parse(const Instruction &instruction)
 		ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT;
 		ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT;
 
-		ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self;
+		ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self);
 
 		// Don't add loop headers to continue blocks,
 		// which would make it impossible branch into the loop header since
 		// they are treated as continues.
-		if (current_block->continue_block != current_block->self)
+		if (current_block->continue_block != BlockID(current_block->self))
 			ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT;
 
 		if (length >= 3)
@@ -1030,6 +1042,37 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpLine:
+	{
+		// OpLine might come at global scope, but we don't care about those since they will not be declared in any
+		// meaningful correct order.
+		// Ignore all OpLine directives which live outside a function.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+
+		// Line directives may arrive before first OpLabel.
+		// Treat this as the line of the function declaration,
+		// so warnings for arguments can propagate properly.
+		if (current_function)
+		{
+			// Store the first one we find and emit it before creating the function prototype.
+			if (current_function->entry_line.file_id == 0)
+			{
+				current_function->entry_line.file_id = ops[0];
+				current_function->entry_line.line_literal = ops[1];
+			}
+		}
+		break;
+	}
+
+	case OpNoLine:
+	{
+		// OpNoLine might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	// Actual opcodes.
 	default:
 	{
diff --git a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
index 0b2c585..1e8f615 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
@@ -61,6 +61,7 @@ public:
 	void end_json_array();
 	void emit_json_array_value(const std::string &value);
 	void emit_json_array_value(uint32_t value);
+	void emit_json_array_value(bool value);
 
 	std::string str() const
 	{
@@ -158,6 +159,16 @@ void Stream::emit_json_array_value(uint32_t value)
 	stack.top().second = true;
 }
 
+void Stream::emit_json_array_value(bool value)
+{
+	if (stack.empty() || stack.top().first != Type::Array)
+		SPIRV_CROSS_THROW("Invalid JSON state");
+	if (stack.top().second)
+		statement_inner(",\n");
+	statement_no_return(value ? "true" : "false");
+	stack.top().second = true;
+}
+
 void Stream::begin_json_object()
 {
 	if (!stack.empty() && stack.top().second)
@@ -256,6 +267,8 @@ string CompilerReflection::compile()
 	json_stream = std::make_shared<simple_json::Stream>();
 	json_stream->set_current_locale_radix_character(current_locale_radix_character);
 	json_stream->begin_json_object();
+	fixup_type_alias();
+	reorder_type_alias();
 	emit_entry_points();
 	emit_types();
 	emit_resources();
@@ -283,7 +296,7 @@ void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag)
 {
 	auto name = type_to_glsl(type);
 
-	if (type.type_alias != 0)
+	if (type.type_alias != TypeID(0))
 		return;
 
 	if (!emitted_open_tag)
@@ -422,6 +435,28 @@ void CompilerReflection::emit_entry_points()
 			json_stream->begin_json_object();
 			json_stream->emit_json_key_value("name", e.name);
 			json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
+			if (e.execution_model == ExecutionModelGLCompute)
+			{
+				const auto &spv_entry = get_entry_point(e.name, e.execution_model);
+
+				SpecializationConstant spec_x, spec_y, spec_z;
+				get_work_group_size_specialization_constants(spec_x, spec_y, spec_z);
+
+				json_stream->emit_json_key_array("workgroup_size");
+				json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id :
+				                                                        spv_entry.workgroup_size.x);
+				json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id :
+				                                                        spv_entry.workgroup_size.y);
+				json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id :
+				                                                        spv_entry.workgroup_size.z);
+				json_stream->end_json_array();
+
+				json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id");
+				json_stream->emit_json_array_value(spec_x.id != ID(0));
+				json_stream->emit_json_array_value(spec_y.id != ID(0));
+				json_stream->emit_json_array_value(spec_z.id != ID(0));
+				json_stream->end_json_array();
+			}
 			json_stream->end_json_object();
 		}
 		json_stream->end_json_array();
@@ -466,7 +501,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou
 		bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) ||
 		                get_decoration_bitset(type.self).get(DecorationBufferBlock);
 
-		uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id;
+		ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id);
 
 		json_stream->begin_json_object();
author	Laszlo Agocs <laszlo.agocs@qt.io>	2019-10-21 14:09:05 +0200
committer	Laszlo Agocs <laszlo.agocs@qt.io>	2019-10-23 10:29:49 +0000
commit	53fc739e3d530a70e5371a08d001bacabc0233de (patch)
tree	836910be948b6d7702b6944fcf0b7947f576d4b4
parent	3ed14d7b0d539f97f2d68c83cc02d6509b24aea7 (diff)