File Index Symbol Index

/*** *** Copyright (C) 1985-2015 Intel Corporation. All rights reserved. *** *** The information and source code contained herein is the exclusive *** property of Intel Corporation and may not be disclosed, examined *** or reproduced in whole or in part without explicit written authorization *** from the company. *** ****/
/* * emmintrin.h * * Principal header file for Willamette New Instruction intrinsics * * The intrinsics package can be used in 2 ways, based whether or not * _EMM_FUNCTIONALITY is defined; if it is, the C implementation * will be used (the "functional intrinsics"). */
#pragma once
#if !defined(_M_IX86) && !defined(_M_X64)
#error This header is specific to X86 and X64 targets #endif
#ifndef _INCLUDED_EMM
#define _INCLUDED_EMM
#ifndef __midl
#if !defined _M_IX86 && !defined _M_X64
#error This header is specific to X86 and X64 targets #endif
#if defined (_M_CEE_PURE)
#error ERROR: EMM intrinsics not supported in the pure mode! #else /* defined (_M_CEE_PURE) */
/* * the __m128 & __m64 types are required for the intrinsics */
#include <xmmintrin.h>
typedef
union
__declspec
(intrin_type)
__declspec
(align(
16
))
__m128i
{
__int8
m128i_i8
[
16
];
__int16
m128i_i16
[
8
];
__int32
m128i_i32
[
4
];
__int64
m128i_i64
[
2
];
unsigned
__int8
m128i_u8
[
16
];
unsigned
__int16
m128i_u16
[
8
];
unsigned
__int32
m128i_u32
[
4
];
unsigned
__int64
m128i_u64
[
2
]; }
__m128i
;
typedef
struct
__declspec
(intrin_type)
__declspec
(align(
16
))
__m128d
{
double
m128d_f64
[
2
]; }
__m128d
;
/* * Macro function for shuffle */
#define _MM_SHUFFLE2(x,y) (((x)<<1) | (y))
/*****************************************************/
/* INTRINSICS FUNCTION PROTOTYPES START HERE */
/*****************************************************/
#if defined __cplusplus
extern
"C"
{
/* Begin "C" */
/* Intrinsics use C name-mangling. */
#endif /* defined __cplusplus */
/* * DP, arithmetic */
extern
__m128d
_mm_add_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_add_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_sub_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_sub_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_mul_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_mul_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_sqrt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_sqrt_pd
(
__m128d
_A
);
extern
__m128d
_mm_div_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_div_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_min_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_min_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_max_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_max_pd
(
__m128d
_A
,
__m128d
_B
);
/* * DP, logicals */
extern
__m128d
_mm_and_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_andnot_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_or_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_xor_pd
(
__m128d
_A
,
__m128d
_B
);
/* * DP, comparisons */
extern
__m128d
_mm_cmpeq_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpeq_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmplt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmplt_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmple_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmple_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpgt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpgt_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpge_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpge_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpneq_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpneq_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnlt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnlt_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnle_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnle_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpngt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpngt_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnge_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpnge_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpord_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpord_sd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpunord_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_cmpunord_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comieq_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comilt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comile_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comigt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comige_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_comineq_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomieq_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomilt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomile_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomigt_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomige_sd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_ucomineq_sd
(
__m128d
_A
,
__m128d
_B
);
/* * DP, converts */
extern
__m128d
_mm_cvtepi32_pd
(
__m128i
_A
);
extern
__m128i
_mm_cvtpd_epi32
(
__m128d
_A
);
extern
__m128i
_mm_cvttpd_epi32
(
__m128d
_A
);
extern
__m128
_mm_cvtepi32_ps
(
__m128i
_A
);
extern
__m128i
_mm_cvtps_epi32
(
__m128
_A
);
extern
__m128i
_mm_cvttps_epi32
(
__m128
_A
);
extern
__m128
_mm_cvtpd_ps
(
__m128d
_A
);
extern
__m128d
_mm_cvtps_pd
(
__m128
_A
);
extern
__m128
_mm_cvtsd_ss
(
__m128
_A
,
__m128d
_B
);
extern
__m128d
_mm_cvtss_sd
(
__m128d
_A
,
__m128
_B
);
extern
int
_mm_cvtsd_si32
(
__m128d
_A
);
extern
int
_mm_cvttsd_si32
(
__m128d
_A
);
extern
__m128d
_mm_cvtsi32_sd
(
__m128d
_A
,
int
_B
);
#if defined(_M_IX86)
extern
__m64
_mm_cvtpd_pi32
(
__m128d
_A
);
extern
__m64
_mm_cvttpd_pi32
(
__m128d
_A
);
extern
__m128d
_mm_cvtpi32_pd
(
__m64
_A
);
#endif
/* * DP, misc */
extern
__m128d
_mm_unpackhi_pd
(
__m128d
_A
,
__m128d
_B
);
extern
__m128d
_mm_unpacklo_pd
(
__m128d
_A
,
__m128d
_B
);
extern
int
_mm_movemask_pd
(
__m128d
_A
);
extern
__m128d
_mm_shuffle_pd
(
__m128d
_A
,
__m128d
_B
,
int
_I
);
/* * DP, loads */
extern
__m128d
_mm_load_pd
(
double
const
*
_Dp
);
extern
__m128d
_mm_load1_pd
(
double
const
*
_Dp
);
extern
__m128d
_mm_loadr_pd
(
double
const
*
_Dp
);
extern
__m128d
_mm_loadu_pd
(
double
const
*
_Dp
);
extern
__m128d
_mm_load_sd
(
double
const
*
_Dp
);
extern
__m128d
_mm_loadh_pd
(
__m128d
_A
,
double
const
*
_Dp
);
extern
__m128d
_mm_loadl_pd
(
__m128d
_A
,
double
const
*
_Dp
);
/* * DP, sets */
extern
__m128d
_mm_set_sd
(
double
_W
);
extern
__m128d
_mm_set1_pd
(
double
_A
);
extern
__m128d
_mm_set_pd
(
double
_Z
,
double
_Y
);
extern
__m128d
_mm_setr_pd
(
double
_Y
,
double
_Z
);
extern
__m128d
_mm_setzero_pd
(
void
);
extern
__m128d
_mm_move_sd
(
__m128d
_A
,
__m128d
_B
);
/* * DP, stores */
extern
void
_mm_store_sd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_store1_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_store_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_storeu_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_storer_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_storeh_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_storel_pd
(
double
*
_Dp
,
__m128d
_A
);
/* * Integer, arithmetic */
extern
__m128i
_mm_add_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_add_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_add_epi32
(
__m128i
_A
,
__m128i
_B
);
#if defined(_M_IX86)
extern
__m64
_mm_add_si64
(
__m64
_A
,
__m64
_B
);
#endif
extern
__m128i
_mm_add_epi64
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_adds_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_adds_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_adds_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_adds_epu16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_avg_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_avg_epu16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_madd_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_max_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_max_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_min_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_min_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_mulhi_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_mulhi_epu16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_mullo_epi16
(
__m128i
_A
,
__m128i
_B
);
#if defined(_M_IX86)
extern
__m64
_mm_mul_su32
(
__m64
_A
,
__m64
_B
);
#endif
extern
__m128i
_mm_mul_epu32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_sad_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_sub_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_sub_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_sub_epi32
(
__m128i
_A
,
__m128i
_B
);
#if defined(_M_IX86)
extern
__m64
_mm_sub_si64
(
__m64
_A
,
__m64
_B
);
#endif
extern
__m128i
_mm_sub_epi64
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_subs_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_subs_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_subs_epu8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_subs_epu16
(
__m128i
_A
,
__m128i
_B
);
/* * Integer, logicals */
extern
__m128i
_mm_and_si128
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_andnot_si128
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_or_si128
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_xor_si128
(
__m128i
_A
,
__m128i
_B
);
/* * Integer, shifts */
extern
__m128i
_mm_slli_si128
(
__m128i
_A
,
int
_Imm
);
#define _mm_bslli_si128 _mm_slli_si128
extern
__m128i
_mm_slli_epi16
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_sll_epi16
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_slli_epi32
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_sll_epi32
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_slli_epi64
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_sll_epi64
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_srai_epi16
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_sra_epi16
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_srai_epi32
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_sra_epi32
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_srli_si128
(
__m128i
_A
,
int
_Imm
);
#define _mm_bsrli_si128 _mm_srli_si128
extern
__m128i
_mm_srli_epi16
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_srl_epi16
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_srli_epi32
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_srl_epi32
(
__m128i
_A
,
__m128i
_Count
);
extern
__m128i
_mm_srli_epi64
(
__m128i
_A
,
int
_Count
);
extern
__m128i
_mm_srl_epi64
(
__m128i
_A
,
__m128i
_Count
);
/* * Integer, comparisons */
extern
__m128i
_mm_cmpeq_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmpeq_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmpeq_epi32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmpgt_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmpgt_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmpgt_epi32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmplt_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmplt_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_cmplt_epi32
(
__m128i
_A
,
__m128i
_B
);
/* * Integer, converts */
extern
__m128i
_mm_cvtsi32_si128
(
int
_A
);
extern
int
_mm_cvtsi128_si32
(
__m128i
_A
);
/* * Integer, misc */
extern
__m128i
_mm_packs_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_packs_epi32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_packus_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
int
_mm_extract_epi16
(
__m128i
_A
,
int
_Imm
);
extern
__m128i
_mm_insert_epi16
(
__m128i
_A
,
int
_B
,
int
_Imm
);
extern
int
_mm_movemask_epi8
(
__m128i
_A
);
extern
__m128i
_mm_shuffle_epi32
(
__m128i
_A
,
int
_Imm
);
extern
__m128i
_mm_shufflehi_epi16
(
__m128i
_A
,
int
_Imm
);
extern
__m128i
_mm_shufflelo_epi16
(
__m128i
_A
,
int
_Imm
);
extern
__m128i
_mm_unpackhi_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpackhi_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpackhi_epi32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpackhi_epi64
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpacklo_epi8
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpacklo_epi16
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpacklo_epi32
(
__m128i
_A
,
__m128i
_B
);
extern
__m128i
_mm_unpacklo_epi64
(
__m128i
_A
,
__m128i
_B
);
/* * Integer, loads */
extern
__m128i
_mm_load_si128
(
__m128i
const
*
_P
);
extern
__m128i
_mm_loadu_si128
(
__m128i
const
*
_P
);
extern
__m128i
_mm_loadl_epi64
(
__m128i
const
*
_P
);
/* * Integer, sets */
#if defined(_M_IX86)
extern
__m128i
_mm_set_epi64
(
__m64
_Q1
,
__m64
_Q0
);
#endif
extern
__m128i
_mm_set_epi64x
(
__int64
_I1
,
__int64
_I0
);
extern
__m128i
_mm_set_epi32
(
int
_I3
,
int
_I2
,
int
_I1
,
int
_I0
);
extern
__m128i
_mm_set_epi16
(
short
_W7
,
short
_W6
,
short
_W5
,
short
_W4
,
short
_W3
,
short
_W2
,
short
_W1
,
short
_W0
);
extern
__m128i
_mm_set_epi8
(
char
_B15
,
char
_B14
,
char
_B13
,
char
_B12
,
char
_B11
,
char
_B10
,
char
_B9
,
char
_B8
,
char
_B7
,
char
_B6
,
char
_B5
,
char
_B4
,
char
_B3
,
char
_B2
,
char
_B1
,
char
_B0
);
#if defined(_M_IX86)
extern
__m128i
_mm_set1_epi64
(
__m64
_Q
);
#endif
extern
__m128i
_mm_set1_epi64x
(
__int64
i
);
extern
__m128i
_mm_set1_epi32
(
int
_I
);
extern
__m128i
_mm_set1_epi16
(
short
_W
);
extern
__m128i
_mm_set1_epi8
(
char
_B
);
extern
__m128i
_mm_setl_epi64
(
__m128i
_Q
);
#if defined(_M_IX86)
extern
__m128i
_mm_setr_epi64
(
__m64
_Q0
,
__m64
_Q1
);
#endif
extern
__m128i
_mm_setr_epi32
(
int
_I0
,
int
_I1
,
int
_I2
,
int
_I3
);
extern
__m128i
_mm_setr_epi16
(
short
_W0
,
short
_W1
,
short
_W2
,
short
_W3
,
short
_W4
,
short
_W5
,
short
_W6
,
short
_W7
);
extern
__m128i
_mm_setr_epi8
(
char
_B15
,
char
_B14
,
char
_B13
,
char
_B12
,
char
_B11
,
char
_B10
,
char
_B9
,
char
_B8
,
char
_B7
,
char
_B6
,
char
_B5
,
char
_B4
,
char
_B3
,
char
_B2
,
char
_B1
,
char
_B0
);
extern
__m128i
_mm_setzero_si128
(
void
);
/* * Integer, stores */
extern
void
_mm_store_si128
(
__m128i
*
_P
,
__m128i
_B
);
extern
void
_mm_storeu_si128
(
__m128i
*
_P
,
__m128i
_B
);
extern
void
_mm_storel_epi64
(
__m128i
*
_P
,
__m128i
_Q
);
extern
void
_mm_maskmoveu_si128
(
__m128i
_D
,
__m128i
_N
,
char
*
_P
);
/* * Integer, moves */
extern
__m128i
_mm_move_epi64
(
__m128i
_Q
);
#if defined(_M_IX86)
extern
__m128i
_mm_movpi64_epi64
(
__m64
_Q
);
extern
__m64
_mm_movepi64_pi64
(
__m128i
_Q
);
#endif
/* * Cacheability support */
extern
void
_mm_stream_pd
(
double
*
_Dp
,
__m128d
_A
);
extern
void
_mm_stream_si128
(
__m128i
*
_P
,
__m128i
_A
);
extern
void
_mm_clflush
(
void
const
*
_P
);
extern
void
_mm_lfence
(
void
);
extern
void
_mm_mfence
(
void
);
extern
void
_mm_stream_si32
(
int
*
_P
,
int
_I
);
extern
void
_mm_pause
(
void
);
/* * New convert to float */
extern
double
_mm_cvtsd_f64
(
__m128d
_A
);
/* * Support for casting between various SP, DP, INT vector types. * Note that these do no conversion of values, they just change * the type. */
extern
__m128
_mm_castpd_ps
(
__m128d
);
extern
__m128i
_mm_castpd_si128
(
__m128d
);
extern
__m128d
_mm_castps_pd
(
__m128
);
extern
__m128i
_mm_castps_si128
(
__m128
);
extern
__m128
_mm_castsi128_ps
(
__m128i
);
extern
__m128d
_mm_castsi128_pd
(
__m128i
);
/* * Support for 64-bit extension intrinsics */
#if defined (_M_X64)
extern __int64 _mm_cvtsd_si64(__m128d);
extern __int64 _mm_cvttsd_si64(__m128d);
extern __m128d _mm_cvtsi64_sd(__m128d, __int64);
extern __m128i _mm_cvtsi64_si128(__int64);
extern __int64 _mm_cvtsi128_si64(__m128i);
/* Alternate intrinsic name definitions */
#define _mm_stream_si64 _mm_stream_si64x #endif /* defined (_M_X64) */
#if defined __cplusplus
};
/* End "C" */
#endif /* defined __cplusplus */
#endif /* defined (_M_CEE_PURE) */ #endif /* __midl */ #endif /* _INCLUDED_EMM */