#pragma once
#if !defined(_M_IX86) && !defined(_M_X64)
#error This header is specific to X86 and X64 targets
#endif
#ifndef _INCLUDED_MM2
#define _INCLUDED_MM2
#ifndef __midl
#if defined (_M_CEE_PURE)
#error ERROR: XMM intrinsics not supported in the pure mode!
#else /* defined (_M_CEE_PURE) */
#ifndef _MMINTRIN_H_INCLUDED
#include <mmintrin.h>
#endif /* _MMINTRIN_H_INCLUDED */
#ifdef _MM2_FUNCTIONALITY
/* support old notation */
#ifndef _MM_FUNCTIONALITY
#define _MM_FUNCTIONALITY
#endif /* _MM_FUNCTIONALITY */
#endif /* _MM2_FUNCTIONALITY */
#ifdef __ICL
#ifdef _MM_FUNCTIONALITY
#include "xmm_func.h"
#else /* _MM_FUNCTIONALITY */
/* using real intrinsics */
typedef long long __m128;
#endif /* _MM_FUNCTIONALITY */
#else /* __ICL */
typedef
union
__declspec
(intrin_type)
__declspec
(align(
16
))
{
float
[
4
];
unsigned
__int64
[
2
];
__int8
[
16
];
__int16
[
8
];
__int32
[
4
];
__int64
[
2
];
unsigned
__int8
[
16
];
unsigned
__int16
[
8
];
unsigned
__int32
[
4
];
}
;
#if !defined _VCRT_BUILD && !defined _INC_MALLOC
#include <malloc.h>
#endif /* !defined _VCRT_BUILD && !defined _INC_MALLOC */
#endif /* __ICL */
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
((fp1) << 2) | ((fp0)))
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
__m128 _Tmp3, _Tmp2, _Tmp1, _Tmp0; \
\
_Tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
_Tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
_Tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
_Tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
\
(row0) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0x88); \
(row1) = _mm_shuffle_ps(_Tmp0, _Tmp1, 0xDD); \
(row2) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0x88); \
(row3) = _mm_shuffle_ps(_Tmp2, _Tmp3, 0xDD); \
}
#define _MM_HINT_NTA 0
#define _MM_HINT_T0 1
#define _MM_HINT_T1 2
#define _MM_HINT_T2 3
#define _MM_HINT_ENTA 4
#define _MM_ALIGN16 _VCRT_ALIGN(16)
#define _MM_EXCEPT_MASK 0x003f
#define _MM_EXCEPT_INVALID 0x0001
#define _MM_EXCEPT_DENORM 0x0002
#define _MM_EXCEPT_DIV_ZERO 0x0004
#define _MM_EXCEPT_OVERFLOW 0x0008
#define _MM_EXCEPT_UNDERFLOW 0x0010
#define _MM_EXCEPT_INEXACT 0x0020
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_INVALID 0x0080
#define _MM_MASK_DENORM 0x0100
#define _MM_MASK_DIV_ZERO 0x0200
#define _MM_MASK_OVERFLOW 0x0400
#define _MM_MASK_UNDERFLOW 0x0800
#define _MM_MASK_INEXACT 0x1000
#define _MM_ROUND_MASK 0x6000
#define _MM_ROUND_NEAREST 0x0000
#define _MM_ROUND_DOWN 0x2000
#define _MM_ROUND_UP 0x4000
#define _MM_ROUND_TOWARD_ZERO 0x6000
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
#define _MM_SET_EXCEPTION_STATE(mask) \
_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (mask))
#define _MM_GET_EXCEPTION_STATE() \
(_mm_getcsr() & _MM_EXCEPT_MASK)
#define _MM_SET_EXCEPTION_MASK(mask) \
_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (mask))
#define _MM_GET_EXCEPTION_MASK() \
(_mm_getcsr() & _MM_MASK_MASK)
#define _MM_SET_ROUNDING_MODE(mode) \
_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (mode))
#define _MM_GET_ROUNDING_MODE() \
(_mm_getcsr() & _MM_ROUND_MASK)
#define _MM_SET_FLUSH_ZERO_MODE(mode) \
_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (mode))
#define _MM_GET_FLUSH_ZERO_MODE() \
(_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
#if defined __cplusplus
extern
"C"
{
#endif /* defined __cplusplus */
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
);
extern
(
);
extern
(
);
extern
(
);
extern
(
);
extern
(
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
,
);
extern
int
(
);
extern
int
(
);
extern
(
,
int
);
extern
float
(
);
#if defined(_M_IX86)
extern
(
);
extern
(
);
extern
(
,
);
#endif
#if defined (_M_X64)
/*
* Support for 64-bit intrinsics
*/
extern __int64 _mm_cvtss_si64(__m128 _A);
extern __int64 _mm_cvttss_si64(__m128 _A);
extern __m128 _mm_cvtsi64_ss(__m128 _A, __int64 _B);
#endif /* defined (_M_X64) */
extern
(
,
,
unsigned
int
);
extern
(
,
);
extern
(
,
);
extern
(
,
const
*);
extern
(
,
);
extern
(
,
);
extern
void
(
*,
);
extern
(
,
const
*);
extern
void
(
*,
);
extern
int
(
);
#if defined(_M_IX86)
extern
int
(
,
int
);
extern
(
,
int
,
int
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
(
,
);
extern
int
(
);
extern
(
,
);
extern
(
,
int
);
extern
void
(
,
,
char
*);
extern
(
,
);
extern
(
,
);
extern
(
,
);
#endif
extern
(
float
);
extern
(
float
);
extern
(
float
,
float
,
float
,
float
);
extern
(
float
,
float
,
float
,
float
);
extern
(
void
);
extern
(
float
const
*
);
extern
(
float
const
*
);
extern
(
float
const
*
);
extern
(
float
const
*
);
extern
(
float
const
*
);
extern
void
(
float
*
,
);
extern
void
(
float
*
,
);
extern
void
(
float
*
,
);
extern
void
(
float
*
,
);
extern
void
(
float
*
,
);
extern
void
(
char
const
*
,
int
);
#if defined(_M_IX86)
extern
void
(
*,
);
#endif
extern
void
(
float
*,
);
extern
(
,
);
extern
void
(
void
);
extern
unsigned
int
(
void
);
extern
void
(
unsigned
int
);
#ifdef __ICL
extern void* __cdecl _mm_malloc(size_t _Siz, size_t _Al);
extern void __cdecl _mm_free(void *_P);
#endif /* __ICL */
#if defined(_M_IX86)
#define _mm_cvtps_pi32 _mm_cvt_ps2pi
#define _mm_cvttps_pi32 _mm_cvtt_ps2pi
#define _mm_cvtpi32_ps _mm_cvt_pi2ps
#define _mm_extract_pi16 _m_pextrw
#define _mm_insert_pi16 _m_pinsrw
#define _mm_max_pi16 _m_pmaxsw
#define _mm_max_pu8 _m_pmaxub
#define _mm_min_pi16 _m_pminsw
#define _mm_min_pu8 _m_pminub
#define _mm_movemask_pi8 _m_pmovmskb
#define _mm_mulhi_pu16 _m_pmulhuw
#define _mm_shuffle_pi16 _m_pshufw
#define _mm_maskmove_si64 _m_maskmovq
#define _mm_avg_pu8 _m_pavgb
#define _mm_avg_pu16 _m_pavgw
#define _mm_sad_pu8 _m_psadbw
#endif
#define _mm_cvtss_si32 _mm_cvt_ss2si
#define _mm_cvttss_si32 _mm_cvtt_ss2si
#define _mm_cvtsi32_ss _mm_cvt_si2ss
#define _mm_set1_ps _mm_set_ps1
#define _mm_load1_ps _mm_load_ps1
#define _mm_store1_ps _mm_store_ps1
#if defined(_M_IX86)
__inline
(
)
{
;
__m64 _Ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), _A);
_Tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
_Tmp
=
(
(),
(
,
_Ext_val
));
return(_mm_cvtpi32_ps(_mm_movelh_ps(_Tmp, _Tmp),
_mm_unpacklo_pi16(_A, _Ext_val)));
}
__inline
(
)
{
;
=
();
_Tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(_A, _Ext_val));
_Tmp
=
(
(),
(
,
_Ext_val
));
return(_mm_cvtpi32_ps(_mm_movelh_ps(_Tmp, _Tmp),
_mm_unpacklo_pi16(_A, _Ext_val)));
}
__inline
(
)
{
return _mm_packs_pi32(_mm_cvtps_pi32(_A),
_mm_cvtps_pi32(_mm_movehl_ps(_A, _A)));
}
__inline
(
)
{
__m64 _Ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), _A);
return _mm_cvtpi16_ps(_mm_unpacklo_pi8(_A, _Ext_val));
}
__inline
(
)
{
return _mm_cvtpu16_ps(_mm_unpacklo_pi8(_A, _mm_setzero_si64()));
}
__inline
(
)
{
return _mm_packs_pi16(_mm_cvtps_pi16(_A), _mm_setzero_si64());
}
__inline
(
,
)
{
return
(
(
(),
),
(
(),
));
}
#endif // _M_IX86
#if defined __cplusplus
};
#endif /* defined __cplusplus */
#endif /* defined (_M_CEE_PURE) */
#endif /* __midl */
#endif /* _INCLUDED_MM2 */