Fixed clang-cl compat for ARM64 · microsoft/DirectXMath@0e6ccda (original) (raw)
`@@ -38,7 +38,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat
`
38
38
` } while (++ElementIndex < 4);
`
39
39
`return Result;
`
40
40
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
41
``
`-
float fScale = 1.0f / (float)(1U << DivExponent);
`
``
41
`+
float fScale = 1.0f / static_cast(1U << DivExponent);
`
42
42
`float32x4_t vResult = vcvtq_f32_s32(vreinterpretq_s32_f32(VInt));
`
43
43
`return vmulq_n_f32(vResult, fScale);
`
44
44
`#else // XM_SSE_INTRINSICS
`
`@@ -85,7 +85,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt
`
85
85
` } while (++ElementIndex < 4);
`
86
86
`return Result;
`
87
87
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
88
``
`-
float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));
`
``
88
`+
float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent));
`
89
89
`// In case of positive overflow, detect it
`
90
90
`uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt);
`
91
91
`// Float to int conversion
`
`@@ -128,7 +128,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat
`
128
128
` } while (++ElementIndex < 4);
`
129
129
`return Result;
`
130
130
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
131
``
`-
float fScale = 1.0f / (float)(1U << DivExponent);
`
``
131
`+
float fScale = 1.0f / static_cast(1U << DivExponent);
`
132
132
`float32x4_t vResult = vcvtq_f32_u32(vreinterpretq_u32_f32(VUInt));
`
133
133
`return vmulq_n_f32(vResult, fScale);
`
134
134
`#else // XM_SSE_INTRINSICS
`
`@@ -185,7 +185,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt
`
185
185
` } while (++ElementIndex < 4);
`
186
186
`return Result;
`
187
187
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
188
``
`-
float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));
`
``
188
`+
float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent));
`
189
189
`// In case of overflow, detect it
`
190
190
`uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt);
`
191
191
`// Float to int conversion
`
`@@ -301,7 +301,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept
`
301
301
` V.vector4_u32[3] = 0;
`
302
302
`return V;
`
303
303
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
304
``
`-
#ifdef _MSC_VER
`
``
304
`+
#if defined(_MSC_VER) && !defined(clang)
`
305
305
`uint32x2_t x = vld1_u32_ex(pSource, 64);
`
306
306
`#else
`
307
307
`uint32x2_t x = vld1_u32(pSource);
`
`@@ -348,7 +348,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept
`
348
348
` V.vector4_f32[3] = 0.f;
`
349
349
`return V;
`
350
350
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
351
``
`-
#ifdef _MSC_VER
`
``
351
`+
#if defined(_MSC_VER) && !defined(clang)
`
352
352
`float32x2_t x = vld1_f32_ex(reinterpret_cast<const float*>(pSource), 64);
`
353
353
`#else
`
354
354
`float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));
`
`@@ -461,7 +461,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept
`
461
461
`return V;
`
462
462
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
463
463
`// Reads an extra integer which is zero'd
`
464
``
`-
#ifdef _MSC_VER
`
``
464
`+
#if defined(_MSC_VER) && !defined(clang)
`
465
465
`uint32x4_t V = vld1q_u32_ex(pSource, 128);
`
466
466
`#else
`
467
467
`uint32x4_t V = vld1q_u32(pSource);
`
`@@ -521,7 +521,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept
`
521
521
`return V;
`
522
522
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
523
523
`// Reads an extra float which is zero'd
`
524
``
`-
#ifdef _MSC_VER
`
``
524
`+
#if defined(_MSC_VER) && !defined(clang)
`
525
525
`float32x4_t V = vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
`
526
526
`#else
`
527
527
`float32x4_t V = vld1q_f32(reinterpret_cast<const float*>(pSource));
`
`@@ -635,7 +635,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept
`
635
635
` V.vector4_u32[3] = pSource[3];
`
636
636
`return V;
`
637
637
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
638
``
`-
#ifdef _MSC_VER
`
``
638
`+
#if defined(_MSC_VER) && !defined(clang)
`
639
639
`return vld1q_u32_ex(pSource, 128);
`
640
640
`#else
`
641
641
`return vreinterpretq_f32_u32(vld1q_u32(pSource));
`
`@@ -679,7 +679,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept
`
679
679
` V.vector4_f32[3] = pSource->w;
`
680
680
`return V;
`
681
681
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
682
``
`-
#ifdef _MSC_VER
`
``
682
`+
#if defined(_MSC_VER) && !defined(clang)
`
683
683
`return vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);
`
684
684
`#else
`
685
685
`return vld1q_f32(reinterpret_cast<const float*>(pSource));
`
`@@ -915,7 +915,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
`
915
915
`return M;
`
916
916
``
917
917
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
918
``
`-
#ifdef _MSC_VER
`
``
918
`+
#if defined(_MSC_VER) && !defined(clang)
`
919
919
`float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128);
`
920
920
`float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128);
`
921
921
`float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128);
`
`@@ -1077,7 +1077,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept
`
1077
1077
`return M;
`
1078
1078
``
1079
1079
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1080
``
`-
#ifdef _MSC_VER
`
``
1080
`+
#if defined(_MSC_VER) && !defined(clang)
`
1081
1081
`float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128);
`
1082
1082
`float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128);
`
1083
1083
`#else
`
`@@ -1208,7 +1208,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept
`
1208
1208
``
1209
1209
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1210
1210
` XMMATRIX M;
`
1211
``
`-
#ifdef _MSC_VER
`
``
1211
`+
#if defined(_MSC_VER) && !defined(clang)
`
1212
1212
` M.r[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);
`
1213
1213
` M.r[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_21), 128);
`
1214
1214
` M.r[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_31), 128);
`
`@@ -1305,7 +1305,7 @@ inline void XM_CALLCONV XMStoreInt2A
`
1305
1305
` pDestination[1] = V.vector4_u32[1];
`
1306
1306
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1307
1307
`uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
`
1308
``
`-
#ifdef _MSC_VER
`
``
1308
`+
#if defined(_MSC_VER) && !defined(clang)
`
1309
1309
`vst1_u32_ex(pDestination, VL, 64);
`
1310
1310
`#else
`
1311
1311
`vst1_u32(pDestination, VL);
`
`@@ -1350,7 +1350,7 @@ inline void XM_CALLCONV XMStoreFloat2A
`
1350
1350
` pDestination->y = V.vector4_f32[1];
`
1351
1351
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1352
1352
`float32x2_t VL = vget_low_f32(V);
`
1353
``
`-
#ifdef _MSC_VER
`
``
1353
`+
#if defined(_MSC_VER) && !defined(clang)
`
1354
1354
`vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
`
1355
1355
`#else
`
1356
1356
`vst1_f32(reinterpret_cast<float*>(pDestination), VL);
`
`@@ -1469,7 +1469,7 @@ inline void XM_CALLCONV XMStoreInt3A
`
1469
1469
` pDestination[2] = V.vector4_u32[2];
`
1470
1470
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1471
1471
`uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));
`
1472
``
`-
#ifdef _MSC_VER
`
``
1472
`+
#if defined(_MSC_VER) && !defined(clang)
`
1473
1473
`vst1_u32_ex(pDestination, VL, 64);
`
1474
1474
`#else
`
1475
1475
`vst1_u32(pDestination, VL);
`
`@@ -1526,7 +1526,7 @@ inline void XM_CALLCONV XMStoreFloat3A
`
1526
1526
` pDestination->z = V.vector4_f32[2];
`
1527
1527
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1528
1528
`float32x2_t VL = vget_low_f32(V);
`
1529
``
`-
#ifdef _MSC_VER
`
``
1529
`+
#if defined(_MSC_VER) && !defined(clang)
`
1530
1530
`vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);
`
1531
1531
`#else
`
1532
1532
`vst1_f32(reinterpret_cast<float*>(pDestination), VL);
`
`@@ -1656,7 +1656,7 @@ inline void XM_CALLCONV XMStoreInt4A
`
1656
1656
` pDestination[2] = V.vector4_u32[2];
`
1657
1657
` pDestination[3] = V.vector4_u32[3];
`
1658
1658
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1659
``
`-
#ifdef _MSC_VER
`
``
1659
`+
#if defined(_MSC_VER) && !defined(clang)
`
1660
1660
`vst1q_u32_ex(pDestination, V, 128);
`
1661
1661
`#else
`
1662
1662
`vst1q_u32(pDestination, vreinterpretq_u32_f32(V));
`
`@@ -1703,7 +1703,7 @@ inline void XM_CALLCONV XMStoreFloat4A
`
1703
1703
` pDestination->z = V.vector4_f32[2];
`
1704
1704
` pDestination->w = V.vector4_f32[3];
`
1705
1705
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1706
``
`-
#ifdef _MSC_VER
`
``
1706
`+
#if defined(_MSC_VER) && !defined(clang)
`
1707
1707
`vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);
`
1708
1708
`#else
`
1709
1709
`vst1q_f32(reinterpret_cast<float*>(pDestination), V);
`
`@@ -1913,7 +1913,7 @@ inline void XM_CALLCONV XMStoreFloat4x3A
`
1913
1913
` pDestination->m[3][2] = M.r[3].vector4_f32[2];
`
1914
1914
``
1915
1915
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
1916
``
`-
#ifdef _MSC_VER
`
``
1916
`+
#if defined(_MSC_VER) && !defined(clang)
`
1917
1917
`float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1);
`
1918
1918
`float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1);
`
1919
1919
`vst1q_f32_ex(&pDestination->m[0][0], T2, 128);
`
`@@ -2057,7 +2057,7 @@ inline void XM_CALLCONV XMStoreFloat3x4A
`
2057
2057
`float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
`
2058
2058
`float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
`
2059
2059
``
2060
``
`-
#ifdef _MSC_VER
`
``
2060
`+
#if defined(_MSC_VER) && !defined(clang)
`
2061
2061
`vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128);
`
2062
2062
`vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128);
`
2063
2063
`vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128);
`
`@@ -2166,7 +2166,7 @@ inline void XM_CALLCONV XMStoreFloat4x4A
`
2166
2166
` pDestination->m[3][3] = M.r[3].vector4_f32[3];
`
2167
2167
``
2168
2168
`#elif defined(XM_ARM_NEON_INTRINSICS)
`
2169
``
`-
#ifdef _MSC_VER
`
``
2169
`+
#if defined(_MSC_VER) && !defined(clang)
`
2170
2170
`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.r[0], 128);
`
2171
2171
`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_21), M.r[1], 128);
`
2172
2172
`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_31), M.r[2], 128);
`