Fixed clang-cl compat for ARM64 · microsoft/DirectXMath@0e6ccda (original) (raw)

`@@ -38,7 +38,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat

38

` } while (++ElementIndex < 4);

39

`return Result;

40

`#elif defined(XM_ARM_NEON_INTRINSICS)

41

float fScale = 1.0f / (float)(1U << DivExponent);

41

float fScale = 1.0f / static_cast(1U << DivExponent);

42

`float32x4_t vResult = vcvtq_f32_s32(vreinterpretq_s32_f32(VInt));

43

`return vmulq_n_f32(vResult, fScale);

44

`#else // XM_SSE_INTRINSICS

`@@ -85,7 +85,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt

85

` } while (++ElementIndex < 4);

86

`return Result;

87

`#elif defined(XM_ARM_NEON_INTRINSICS)

88

float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));

88

float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent));

89

`// In case of positive overflow, detect it

90

`uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt);

91

`// Float to int conversion

`@@ -128,7 +128,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat

128

` } while (++ElementIndex < 4);

129

`return Result;

130

`#elif defined(XM_ARM_NEON_INTRINSICS)

131

float fScale = 1.0f / (float)(1U << DivExponent);

131

float fScale = 1.0f / static_cast(1U << DivExponent);

132

`float32x4_t vResult = vcvtq_f32_u32(vreinterpretq_u32_f32(VUInt));

133

`return vmulq_n_f32(vResult, fScale);

134

`#else // XM_SSE_INTRINSICS

`@@ -185,7 +185,7 @@ inline XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt

185

` } while (++ElementIndex < 4);

186

`return Result;

187

`#elif defined(XM_ARM_NEON_INTRINSICS)

188

float32x4_t vResult = vmulq_n_f32(VFloat, (float)(1U << MulExponent));

188

float32x4_t vResult = vmulq_n_f32(VFloat, static_cast(1U << MulExponent));

189

`// In case of overflow, detect it

190

`uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt);

191

`// Float to int conversion

`@@ -301,7 +301,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt2A(const uint32_t* pSource) noexcept

301

` V.vector4_u32[3] = 0;

302

`return V;

303

`#elif defined(XM_ARM_NEON_INTRINSICS)

304

#ifdef _MSC_VER

304

#if defined(_MSC_VER) && !defined(clang)

305

`uint32x2_t x = vld1_u32_ex(pSource, 64);

306

`#else

307

`uint32x2_t x = vld1_u32(pSource);

`@@ -348,7 +348,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept

348

` V.vector4_f32[3] = 0.f;

349

`return V;

350

`#elif defined(XM_ARM_NEON_INTRINSICS)

351

#ifdef _MSC_VER

351

#if defined(_MSC_VER) && !defined(clang)

352

`float32x2_t x = vld1_f32_ex(reinterpret_cast<const float*>(pSource), 64);

353

`#else

354

`float32x2_t x = vld1_f32(reinterpret_cast<const float*>(pSource));

`@@ -461,7 +461,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt3A(const uint32_t* pSource) noexcept

461

`return V;

462

`#elif defined(XM_ARM_NEON_INTRINSICS)

463

`// Reads an extra integer which is zero'd

464

#ifdef _MSC_VER

464

#if defined(_MSC_VER) && !defined(clang)

465

`uint32x4_t V = vld1q_u32_ex(pSource, 128);

466

`#else

467

`uint32x4_t V = vld1q_u32(pSource);

`@@ -521,7 +521,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept

521

`return V;

522

`#elif defined(XM_ARM_NEON_INTRINSICS)

523

`// Reads an extra float which is zero'd

524

#ifdef _MSC_VER

524

#if defined(_MSC_VER) && !defined(clang)

525

`float32x4_t V = vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);

526

`#else

527

`float32x4_t V = vld1q_f32(reinterpret_cast<const float*>(pSource));

`@@ -635,7 +635,7 @@ inline XMVECTOR XM_CALLCONV XMLoadInt4A(const uint32_t* pSource) noexcept

635

` V.vector4_u32[3] = pSource[3];

636

`return V;

637

`#elif defined(XM_ARM_NEON_INTRINSICS)

638

#ifdef _MSC_VER

638

#if defined(_MSC_VER) && !defined(clang)

639

`return vld1q_u32_ex(pSource, 128);

640

`#else

641

`return vreinterpretq_f32_u32(vld1q_u32(pSource));

`@@ -679,7 +679,7 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept

679

` V.vector4_f32[3] = pSource->w;

680

`return V;

681

`#elif defined(XM_ARM_NEON_INTRINSICS)

682

#ifdef _MSC_VER

682

#if defined(_MSC_VER) && !defined(clang)

683

`return vld1q_f32_ex(reinterpret_cast<const float*>(pSource), 128);

684

`#else

685

`return vld1q_f32(reinterpret_cast<const float*>(pSource));

`@@ -915,7 +915,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept

915

`return M;

916

917

`#elif defined(XM_ARM_NEON_INTRINSICS)

918

#ifdef _MSC_VER

918

#if defined(_MSC_VER) && !defined(clang)

919

`float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128);

920

`float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128);

921

`float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128);

`@@ -1077,7 +1077,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept

1077

`return M;

1078

1079

`#elif defined(XM_ARM_NEON_INTRINSICS)

1080

#ifdef _MSC_VER

1080

#if defined(_MSC_VER) && !defined(clang)

1081

`float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128);

1082

`float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128);

1083

`#else

`@@ -1208,7 +1208,7 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept

1208

1209

`#elif defined(XM_ARM_NEON_INTRINSICS)

1210

` XMMATRIX M;

1211

#ifdef _MSC_VER

1211

#if defined(_MSC_VER) && !defined(clang)

1212

` M.r[0] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_11), 128);

1213

` M.r[1] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_21), 128);

1214

` M.r[2] = vld1q_f32_ex(reinterpret_cast<const float*>(&pSource->_31), 128);

`@@ -1305,7 +1305,7 @@ inline void XM_CALLCONV XMStoreInt2A

1305

` pDestination[1] = V.vector4_u32[1];

1306

`#elif defined(XM_ARM_NEON_INTRINSICS)

1307

`uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));

1308

#ifdef _MSC_VER

1308

#if defined(_MSC_VER) && !defined(clang)

1309

`vst1_u32_ex(pDestination, VL, 64);

1310

`#else

1311

`vst1_u32(pDestination, VL);

`@@ -1350,7 +1350,7 @@ inline void XM_CALLCONV XMStoreFloat2A

1350

` pDestination->y = V.vector4_f32[1];

1351

`#elif defined(XM_ARM_NEON_INTRINSICS)

1352

`float32x2_t VL = vget_low_f32(V);

1353

#ifdef _MSC_VER

1353

#if defined(_MSC_VER) && !defined(clang)

1354

`vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);

1355

`#else

1356

`vst1_f32(reinterpret_cast<float*>(pDestination), VL);

`@@ -1469,7 +1469,7 @@ inline void XM_CALLCONV XMStoreInt3A

1469

` pDestination[2] = V.vector4_u32[2];

1470

`#elif defined(XM_ARM_NEON_INTRINSICS)

1471

`uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V));

1472

#ifdef _MSC_VER

1472

#if defined(_MSC_VER) && !defined(clang)

1473

`vst1_u32_ex(pDestination, VL, 64);

1474

`#else

1475

`vst1_u32(pDestination, VL);

`@@ -1526,7 +1526,7 @@ inline void XM_CALLCONV XMStoreFloat3A

1526

` pDestination->z = V.vector4_f32[2];

1527

`#elif defined(XM_ARM_NEON_INTRINSICS)

1528

`float32x2_t VL = vget_low_f32(V);

1529

#ifdef _MSC_VER

1529

#if defined(_MSC_VER) && !defined(clang)

1530

`vst1_f32_ex(reinterpret_cast<float*>(pDestination), VL, 64);

1531

`#else

1532

`vst1_f32(reinterpret_cast<float*>(pDestination), VL);

`@@ -1656,7 +1656,7 @@ inline void XM_CALLCONV XMStoreInt4A

1656

` pDestination[2] = V.vector4_u32[2];

1657

` pDestination[3] = V.vector4_u32[3];

1658

`#elif defined(XM_ARM_NEON_INTRINSICS)

1659

#ifdef _MSC_VER

1659

#if defined(_MSC_VER) && !defined(clang)

1660

`vst1q_u32_ex(pDestination, V, 128);

1661

`#else

1662

`vst1q_u32(pDestination, vreinterpretq_u32_f32(V));

`@@ -1703,7 +1703,7 @@ inline void XM_CALLCONV XMStoreFloat4A

1703

` pDestination->z = V.vector4_f32[2];

1704

` pDestination->w = V.vector4_f32[3];

1705

`#elif defined(XM_ARM_NEON_INTRINSICS)

1706

#ifdef _MSC_VER

1706

#if defined(_MSC_VER) && !defined(clang)

1707

`vst1q_f32_ex(reinterpret_cast<float*>(pDestination), V, 128);

1708

`#else

1709

`vst1q_f32(reinterpret_cast<float*>(pDestination), V);

`@@ -1913,7 +1913,7 @@ inline void XM_CALLCONV XMStoreFloat4x3A

1913

` pDestination->m[3][2] = M.r[3].vector4_f32[2];

1914

1915

`#elif defined(XM_ARM_NEON_INTRINSICS)

1916

#ifdef _MSC_VER

1916

#if defined(_MSC_VER) && !defined(clang)

1917

`float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1);

1918

`float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1);

1919

`vst1q_f32_ex(&pDestination->m[0][0], T2, 128);

`@@ -2057,7 +2057,7 @@ inline void XM_CALLCONV XMStoreFloat3x4A

2057

`float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);

2058

`float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);

2059

2060

#ifdef _MSC_VER

2060

#if defined(_MSC_VER) && !defined(clang)

2061

`vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128);

2062

`vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128);

2063

`vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128);

`@@ -2166,7 +2166,7 @@ inline void XM_CALLCONV XMStoreFloat4x4A

2166

` pDestination->m[3][3] = M.r[3].vector4_f32[3];

2167

2168

`#elif defined(XM_ARM_NEON_INTRINSICS)

2169

#ifdef _MSC_VER

2169

#if defined(_MSC_VER) && !defined(clang)

2170

`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_11), M.r[0], 128);

2171

`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_21), M.r[1], 128);

2172

`vst1q_f32_ex(reinterpret_cast<float*>(&pDestination->_31), M.r[2], 128);