1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
use super::super::*;
use x86::sse2::*;

#[allow(dead_code)]
extern "platform-intrinsic" {
    fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4;
    fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2;
    fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16;
    fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8;
    fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4;
    fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4;
    fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16;
    fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8;
    fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4;
    fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4;
    fn x86_mm_minpos_epu16(x: u16x8) -> u16x8;
    fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8;
    fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2;
    fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8;
    fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32;
    fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32;
    fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32;
}

// 32 bit floats

pub trait Sse41F32x4 {}
impl Sse41F32x4 for f32x4 {}

// 64 bit floats

pub trait Sse41F64x2 {}
impl Sse41F64x2 for f64x2 {}

// 64 bit integers

pub trait Sse41U64x2 {
    fn testc(self, other: Self) -> i32;
    fn testnzc(self, other: Self) -> i32;
    fn testz(self, other: Self) -> i32;
}
impl Sse41U64x2 for u64x2 {
    #[inline]
    fn testc(self, other: Self) -> i32 {
        unsafe { x86_mm_testc_si128(self, other) }
    }
    #[inline]
    fn testnzc(self, other: Self) -> i32 {
        unsafe { x86_mm_testnzc_si128(self, other) }
    }
    #[inline]
    fn testz(self, other: Self) -> i32 {
        unsafe { x86_mm_testz_si128(self, other) }
    }
}
pub trait Sse41I64x2 {}
impl Sse41I64x2 for i64x2 {}

pub trait Sse41Bool64ix2 {}
impl Sse41Bool64ix2 for bool64ix2 {}

// 32 bit integers

pub trait Sse41U32x4 {
    fn max(self, other: Self) -> Self;
    fn min(self, other: Self) -> Self;
}
impl Sse41U32x4 for u32x4 {
    #[inline]
    fn max(self, other: Self) -> Self {
        unsafe { x86_mm_max_epu32(self, other) }
    }
    #[inline]
    fn min(self, other: Self) -> Self {
        unsafe { x86_mm_min_epu32(self, other) }
    }
}
pub trait Sse41I32x4 {
    fn max(self, other: Self) -> Self;
    fn min(self, other: Self) -> Self;
    fn low_mul(self, other: Self) -> i64x2;
    fn packus(self, other: Self) -> u16x8;
}
impl Sse41I32x4 for i32x4 {
    #[inline]
    fn max(self, other: Self) -> Self {
        unsafe { x86_mm_max_epi32(self, other) }
    }
    #[inline]
    fn min(self, other: Self) -> Self {
        unsafe { x86_mm_min_epi32(self, other) }
    }

    #[inline]
    fn low_mul(self, other: Self) -> i64x2 {
        unsafe { x86_mm_mul_epi32(self, other) }
    }
    #[inline]
    fn packus(self, other: Self) -> u16x8 {
        unsafe { x86_mm_packus_epi32(self, other) }
    }
}

pub trait Sse41Bool32ix4 {}
impl Sse41Bool32ix4 for bool32ix4 {}

// 16 bit integers

pub trait Sse41U16x8 {
    fn max(self, other: Self) -> Self;
    fn min(self, other: Self) -> Self;
    fn minpos(self) -> Self;
}
impl Sse41U16x8 for u16x8 {
    #[inline]
    fn max(self, other: Self) -> Self {
        unsafe { x86_mm_max_epu16(self, other) }
    }
    #[inline]
    fn min(self, other: Self) -> Self {
        unsafe { x86_mm_min_epu16(self, other) }
    }

    #[inline]
    fn minpos(self) -> Self {
        unsafe { x86_mm_minpos_epu16(self) }
    }
}
pub trait Sse41I16x8 {}
impl Sse41I16x8 for i16x8 {}

pub trait Sse41Bool16ix8 {}
impl Sse41Bool16ix8 for bool16ix8 {}

// 8 bit integers

pub trait Sse41U8x16 {}
impl Sse41U8x16 for u8x16 {}
pub trait Sse41I8x16 {
    fn max(self, other: Self) -> Self;
    fn min(self, other: Self) -> Self;
}
impl Sse41I8x16 for i8x16 {
    #[inline]
    fn max(self, other: Self) -> Self {
        unsafe { x86_mm_max_epi8(self, other) }
    }
    #[inline]
    fn min(self, other: Self) -> Self {
        unsafe { x86_mm_min_epi8(self, other) }
    }
}

pub trait Sse41Bool8ix16 {}
impl Sse41Bool8ix16 for bool8ix16 {}