nativity/lib/libc/include/any-macos-any/simd/common.h

/*! @header
 *  The interfaces declared in this header provide "common" elementwise
 *  operations that are neither math nor logic functions.  These are available
 *  only for floating-point vectors and scalars, except for min, max, abs,
 *  clamp, and the reduce operations, which also support integer vectors.
 *
 *      simd_abs(x)             Absolute value of x.  Also available as fabs
 *                              for floating-point vectors.  If x is the
 *                              smallest signed integer, x is returned.
 *
 *      simd_max(x,y)           Returns the maximum of x and y.  Also available
 *                              as fmax for floating-point vectors.
 *
 *      simd_min(x,y)           Returns the minimum of x and y.  Also available
 *                              as fmin for floating-point vectors.
 *
 *      simd_clamp(x,min,max)   x clamped to the range [min, max].
 *
 *      simd_sign(x)            -1 if x is less than zero, 0 if x is zero or
 *                              NaN, and +1 if x is greater than zero.
 *
 *      simd_mix(x,y,t)         If t is not in the range [0,1], the result is
 *      simd_lerp(x,y,t)        undefined.  Otherwise the result is x+(y-x)*t,
 *                              which linearly interpolates between x and y.
 *
 *      simd_recip(x)           An approximation to 1/x.  If x is very near the
 *                              limits of representable values, or is infinity
 *                              or NaN, the result is undefined.  There are
 *                              two variants of this function:
 *
 *                                  simd_precise_recip(x)
 *
 *                              and
 *
 *                                  simd_fast_recip(x).
 *
 *                              The "precise" variant is accurate to a few ULPs,
 *                              whereas the "fast" variant may have as little
 *                              as 11 bits of accuracy in float and about 22
 *                              bits in double.
 *
 *                              The function simd_recip(x) resolves to
 *                              simd_precise_recip(x) ordinarily, but to
 *                              simd_fast_recip(x) when used in a translation
 *                              unit compiled with -ffast-math (when
 *                              -ffast-math is in effect, you may still use the
 *                              precise version of this function by calling it
 *                              explicitly by name).
 *
 *      simd_rsqrt(x)           An approximation to 1/sqrt(x).  If x is
 *                              infinity or NaN, the result is undefined.
 *                              There are two variants of this function:
 *
 *                                  simd_precise_rsqrt(x)
 *
 *                              and
 *
 *                                  simd_fast_rsqrt(x).
 *
 *                              The "precise" variant is accurate to a few ULPs,
 *                              whereas the "fast" variant may have as little
 *                              as 11 bits of accuracy in float and about 22
 *                              bits in double.
 *
 *                              The function simd_rsqrt(x) resolves to
 *                              simd_precise_rsqrt(x) ordinarily, but to
 *                              simd_fast_rsqrt(x) when used in a translation
 *                              unit compiled with -ffast-math (when
 *                              -ffast-math is in effect, you may still use the
 *                              precise version of this function by calling it
 *                              explicitly by name).
 *
 *      simd_fract(x)           The "fractional part" of x, which lies strictly
 *                              in the range [0, 0x1.fffffep-1].
 *
 *      simd_step(edge,x)       0 if x < edge, and 1 otherwise.
 *
 *      simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and
 *                              a Hermite interpolation between 0 and 1 if
 *                              edge0 < x < edge1.
 *
 *      simd_reduce_add(x)      Sum of the elements of x.
 *
 *      simd_reduce_min(x)      Minimum of the elements of x.
 *
 *      simd_reduce_max(x)      Maximum of the elements of x.
 *
 *      simd_equal(x,y)         True if and only if every lane of x is equal
 *                              to the corresponding lane of y.
 *
 *  The following common functions are available in the simd:: namespace:
 *
 *      C++ Function                    Equivalent C Function
 *      --------------------------------------------------------------------
 *      simd::abs(x)                    simd_abs(x)
 *      simd::max(x,y)                  simd_max(x,y)
 *      simd::min(x,y)                  simd_min(x,y)
 *      simd::clamp(x,min,max)          simd_clamp(x,min,max)
 *      simd::sign(x)                   simd_sign(x)
 *      simd::mix(x,y,t)                simd_mix(x,y,t)
 *      simd::lerp(x,y,t)               simd_lerp(x,y,t)
 *      simd::recip(x)                  simd_recip(x)
 *      simd::rsqrt(x)                  simd_rsqrt(x)
 *      simd::fract(x)                  simd_fract(x)
 *      simd::step(edge,x)              simd_step(edge,x)
 *      simd::smoothstep(e0,e1,x)       simd_smoothstep(e0,e1,x)
 *      simd::reduce_add(x)             simd_reduce_add(x)
 *      simd::reduce_max(x)             simd_reduce_max(x)
 *      simd::reduce_min(x)             simd_reduce_min(x)
 *      simd::equal(x,y)                simd_equal(x,y)
 *
 *      simd::precise::recip(x)         simd_precise_recip(x)
 *      simd::precise::rsqrt(x)         simd_precise_rsqrt(x)
 *
 *      simd::fast::recip(x)            simd_fast_recip(x)
 *      simd::fast::rsqrt(x)            simd_fast_rsqrt(x)
 *
 *  @copyright 2014-2017 Apple, Inc. All rights reserved.
 *  @unsorted                                                                 */

#ifndef SIMD_COMMON_HEADER
#define SIMD_COMMON_HEADER

#include <simd/base.h>
#if SIMD_COMPILER_HAS_REQUIRED_FEATURES
#include <simd/vector_make.h>
#include <simd/logic.h>
#include <simd/math.h>

#ifdef __cplusplus
extern "C" {
#endif

/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x);
/*! @abstract The elementwise absolute value of x.                            */
static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x);
/*! @abstract The elementwise absolute value of x.
 *  @discussion Deprecated. Use simd_abs(x) instead.                          */
#define vector_abs simd_abs

/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC float simd_max(float x, float y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC double simd_max(double x, double y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y);
/*! @abstract The elementwise maximum of x and y.                             */
static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y);
/*! @abstract The elementwise maximum of x and y.
 *  @discussion Deprecated. Use simd_max(x,y) instead.                        */
#define vector_max simd_max

/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC float simd_min(float x, float y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC double simd_min(double x, double y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y);
/*! @abstract The elementwise minimum of x and y.                             */
static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y);
/*! @abstract The elementwise minimum of x and y.
 *  @discussion Deprecated. Use simd_min(x,y) instead.                        */
#define vector_min simd_min


/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC float simd_clamp(float x, float min, float max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC double simd_clamp(double x, double min, double max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Note that if you want to clamp all lanes to the same range,
 *  you can use a scalar value for min and max.                               */
static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max);
/*! @abstract x clamped to the range [min, max].
 *  @discussion Deprecated. Use simd_clamp(x,min,max) instead.                */
#define vector_clamp simd_clamp

/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC float simd_sign(float x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC double simd_sign(double x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.      */
static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x);
/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise.
 *  @discussion Deprecated. Use simd_sign(x) instead.                         */
#define vector_sign simd_sign

/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC float simd_mix(float x, float y, float t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC double simd_mix(double x, double y, double t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1                                                        */
static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t);
/*! @abstract Linearly interpolates between x and y, taking the value x when
 *  t=0 and y when t=1
 *  @discussion Deprecated. Use simd_mix(x, y, t) instead.                    */
#define vector_mix simd_mix
#define simd_lerp simd_mix

/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC float simd_precise_recip(float x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC double simd_precise_recip(double x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  a few units in the last place (ULPs).                                     */
static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x);
/*! @abstract A good approximation to 1/x.
 *  @discussion Deprecated. Use simd_precise_recip(x) instead.                */
#define vector_precise_recip simd_precise_recip

/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC float simd_fast_recip(float x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC double simd_fast_recip(double x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow; otherwise this function is accurate to
 *  at least 11 bits for float and 22 bits for double.                        */
static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x);
/*! @abstract A fast approximation to 1/x.
 *  @discussion Deprecated. Use simd_fast_recip(x) instead.                   */
#define vector_fast_recip simd_fast_recip

/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC float simd_recip(float x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC double simd_recip(double x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x);
/*! @abstract An approximation to 1/x.
 *  @discussion If x is very close to the limits of representation, the
 *  result may overflow or underflow. This function maps to
 *  simd_fast_recip(x) if -ffast-math is specified, and to
 *  simd_precise_recip(x) otherwise.                                          */
static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x);
/*! @abstract An approximation to 1/x.
 *  @discussion Deprecated. Use simd_recip(x) instead.                        */
#define vector_recip simd_recip

/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC float simd_precise_rsqrt(float x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC double simd_precise_rsqrt(double x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion This function is accurate to a few units in the last place
 *  (ULPs).                                                                   */
static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x);
/*! @abstract A good approximation to 1/sqrt(x).
 *  @discussion Deprecated. Use simd_precise_rsqrt(x) instead.                */
#define vector_precise_rsqrt simd_precise_rsqrt

/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC float simd_fast_rsqrt(float x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC double simd_fast_rsqrt(double x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion This function is accurate to at least 11 bits for float and
 *  22 bits for double.                                                       */
static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x);
/*! @abstract A fast approximation to 1/sqrt(x).
 *  @discussion Deprecated. Use simd_fast_rsqrt(x) instead.                   */
#define vector_fast_rsqrt simd_fast_rsqrt

/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC float simd_rsqrt(float x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC double simd_rsqrt(double x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion This function maps to simd_fast_recip(x) if -ffast-math is
 *  specified, and to simd_precise_recip(x) otherwise.                        */
static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x);
/*! @abstract An approximation to 1/sqrt(x).
 *  @discussion Deprecated. Use simd_rsqrt(x) instead.                        */
#define vector_rsqrt simd_rsqrt

/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC float simd_fract(float x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC double simd_fract(double x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion floor(x) + fract(x) is *approximately* equal to x. If x is
 *  positive and finite, then the two values are exactly equal.               */
static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x);
/*! @abstract The "fractional part" of x, lying in the range [0, 1).
 *  @discussion Deprecated. Use simd_fract(x) instead.                        */
#define vector_fract simd_fract

/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC float simd_step(float edge, float x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC double simd_step(double edge, double x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Use a scalar value for edge if you want to apply the same
 *  threshold to all lanes.                                                   */
static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x);
/*! @abstract 0 if x < edge, and 1 otherwise.
 *  @discussion Deprecated. Use simd_step(edge, x) instead.                   */
#define vector_step simd_step

/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion You can use a scalar value for edge0 and edge1 if you want
 *  to clamp all lanes at the same points.                                    */
static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x);
/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1
 *  @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead.     */
#define vector_smoothstep simd_smoothstep

/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x);
/*! @abstract Sum of elements in x.
 *  @discussion This computation may overflow; especial for 8-bit types you
 *  may need to convert to a wider type before reducing.                      */
static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x);
/*! @abstract Sum of elements in x.
 *  @discussion Deprecated. Use simd_add(x) instead.                          */
#define vector_reduce_add simd_reduce_add

/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x);
/*! @abstract Minimum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x);
/*! @abstract Minimum of elements in x.
 *  @discussion Deprecated. Use simd_min(x) instead.                          */
#define vector_reduce_min simd_reduce_min

/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x);
/*! @abstract Maximum of elements in x.                                       */
static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x);
/*! @abstract Maximum of elements in x.
 *  @discussion Deprecated. Use simd_max(x) instead.                          */
#define vector_reduce_max simd_reduce_max

/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) {
  return simd_all(x == y);
}
/*! @abstract True if and only if each lane of x is equal to the
 *  corresponding lane of y.                                                  */
static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) {
  return simd_all(x == y);
}

#ifdef __cplusplus
} /* extern "C" */

namespace simd {
  /*! @abstract The lanewise absolute value of x.                             */
  template <typename typeN> static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); }
  /*! @abstract The lanewise maximum of x and y.                              */
  template <typename typeN> static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); }
  /*! @abstract The lanewise minimum of x and y.                              */
  template <typename typeN> static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); }
  /*! @abstract x clamped to the interval [min, max].                         */
  template <typename typeN> static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); }
  /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise.                    */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); }
  /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN lerp(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); }
  /*! @abstract An approximation to 1/x.                                      */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); }
  /*! @abstract An approximation to 1/sqrt(x).                                */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); }
  /*! @abstract The "fracional part" of x, in the range [0,1).                */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); }
  /*! @abstract 0 if x < edge, 1 otherwise.                                   */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); }
  /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1.          */
  template <typename fptypeN> static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); }
  /*! @abstract True if and only if each lane of x is equal to the
   *  corresponding lane of y.
   *
   *  @discussion This isn't operator== because that's already defined by
   *  the compiler to return a lane mask.                                     */
  template <typename fptypeN> static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); }
#if __cpp_decltype_auto
  /*  If you are targeting an earlier version of the C++ standard that lacks
   decltype_auto support, you may use the C-style simd_reduce_* functions
   instead.                                                                   */
  /*! @abstract The sum of the elements in x. May overflow.                   */
  template <typename typeN> static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); }
  /*! @abstract The least element in x.                                       */
  template <typename typeN> static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); }
  /*! @abstract The greatest element in x.                                    */
  template <typename typeN> static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); }
#endif
  namespace precise {
    /*! @abstract An approximation to 1/x.                                      */
    template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); }
    /*! @abstract An approximation to 1/sqrt(x).                                */
    template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); }
  }
  namespace fast {
    /*! @abstract An approximation to 1/x.                                      */
    template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); }
    /*! @abstract An approximation to 1/sqrt(x).                                */
    template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); }
  }
}

extern "C" {
#endif /* __cplusplus */

#pragma mark - Implementation

static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) {
  return simd_make_char2(simd_abs(simd_make_char8_undef(x)));
}

static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) {
  return simd_make_char3(simd_abs(simd_make_char8_undef(x)));
}

static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) {
  return simd_make_char4(simd_abs(simd_make_char8_undef(x)));
}

static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) {
#if defined __arm__ || defined __arm64__
  return vabs_s8(x);
#else
  return simd_make_char8(simd_abs(simd_make_char16_undef(x)));
#endif
}

static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) {
#if defined __arm__ || defined __arm64__
  return vabsq_s8(x);
#elif defined __SSE4_1__
  return (simd_char16) _mm_abs_epi8((__m128i)x);
#else
  simd_char16 mask = x >> 7; return (x ^ mask) - mask;
#endif
}

static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) {
#if defined __AVX2__
  return _mm256_abs_epi8(x);
#else
  return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) {
#if defined __AVX512BW__
  return _mm512_abs_epi8(x);
#else
  return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) {
  return simd_make_short2(simd_abs(simd_make_short4_undef(x)));
}

static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) {
  return simd_make_short3(simd_abs(simd_make_short4_undef(x)));
}

static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) {
#if defined __arm__ || defined __arm64__
  return vabs_s16(x);
#else
  return simd_make_short4(simd_abs(simd_make_short8_undef(x)));
#endif
}

static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) {
#if defined __arm__ || defined __arm64__
  return vabsq_s16(x);
#elif defined __SSE4_1__
  return (simd_short8) _mm_abs_epi16((__m128i)x);
#else
  simd_short8 mask = x >> 15; return (x ^ mask) - mask;
#endif
}

static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) {
#if defined __AVX2__
  return _mm256_abs_epi16(x);
#else
  return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) {
#if defined __AVX512BW__
  return _mm512_abs_epi16(x);
#else
  return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) {
#if defined __arm__ || defined __arm64__
  return vabs_s32(x);
#else
  return simd_make_int2(simd_abs(simd_make_int4_undef(x)));
#endif
}

static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) {
  return simd_make_int3(simd_abs(simd_make_int4_undef(x)));
}

static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) {
#if defined __arm__ || defined __arm64__
  return vabsq_s32(x);
#elif defined __SSE4_1__
  return (simd_int4) _mm_abs_epi32((__m128i)x);
#else
  simd_int4 mask = x >> 31; return (x ^ mask) - mask;
#endif
}

static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) {
#if defined __AVX2__
  return _mm256_abs_epi32(x);
#else
  return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) {
#if defined __AVX512F__
  return _mm512_abs_epi32(x);
#else
  return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) {
#if defined __arm64__
  return vabsq_s64(x);
#elif defined __AVX512VL__
  return (simd_long2) _mm_abs_epi64((__m128i)x);
#else
  simd_long2 mask = x >> 63; return (x ^ mask) - mask;
#endif
}

static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) {
  return simd_make_long3(simd_abs(simd_make_long4_undef(x)));
}

static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) {
#if defined __AVX512VL__
  return _mm256_abs_epi64(x);
#else
  return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) {
#if defined __AVX512F__
  return _mm512_abs_epi64(x);
#else
  return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi));
#endif
}

static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) {
  return __tg_fabs(x);
}

static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) {
  return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) {
  return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) {
  return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) {
#if defined __arm__ || defined __arm64__
  return vmin_s8(x, y);
#else
  return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) {
#if defined __arm__ || defined __arm64__
  return vminq_s8(x, y);
#elif defined __SSE4_1__
  return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) {
#if defined __AVX2__
  return _mm256_min_epi8(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) {
#if defined __AVX512BW__
  return _mm512_min_epi8(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) {
  return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) {
  return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) {
  return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) {
#if defined __arm__ || defined __arm64__
  return vmin_u8(x, y);
#else
  return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) {
#if defined __arm__ || defined __arm64__
  return vminq_u8(x, y);
#elif defined __SSE4_1__
  return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) {
#if defined __AVX2__
  return _mm256_min_epu8(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) {
#if defined __AVX512BW__
  return _mm512_min_epu8(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) {
  return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
}

static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) {
  return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y)));
}

static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) {
#if defined __arm__ || defined __arm64__
  return vmin_s16(x, y);
#else
  return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) {
#if defined __arm__ || defined __arm64__
  return vminq_s16(x, y);
#elif defined __SSE4_1__
  return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) {
#if defined __AVX2__
  return _mm256_min_epi16(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) {
#if defined __AVX512BW__
  return _mm512_min_epi16(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) {
  return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
}

static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) {
  return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
}

static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) {
#if defined __arm__ || defined __arm64__
  return vmin_u16(x, y);
#else
  return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) {
#if defined __arm__ || defined __arm64__
  return vminq_u16(x, y);
#elif defined __SSE4_1__
  return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) {
#if defined __AVX2__
  return _mm256_min_epu16(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) {
#if defined __AVX512BW__
  return _mm512_min_epu16(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) {
#if defined __arm__ || defined __arm64__
  return vmin_s32(x, y);
#else
  return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) {
  return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y)));
}

static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) {
#if defined __arm__ || defined __arm64__
  return vminq_s32(x, y);
#elif defined __SSE4_1__
  return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) {
#if defined __AVX2__
  return _mm256_min_epi32(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) {
#if defined __AVX512F__
  return _mm512_min_epi32(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) {
#if defined __arm__ || defined __arm64__
  return vmin_u32(x, y);
#else
  return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) {
  return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
}

static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) {
#if defined __arm__ || defined __arm64__
  return vminq_u32(x, y);
#elif defined __SSE4_1__
  return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) {
#if defined __AVX2__
  return _mm256_min_epu32(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) {
#if defined __AVX512F__
  return _mm512_min_epu32(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC float simd_min(float x, float y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) {
#if defined __AVX512VL__
  return _mm_min_epi64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) {
  return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y)));
}

static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) {
#if defined __AVX512VL__
  return _mm256_min_epi64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) {
#if defined __AVX512F__
  return _mm512_min_epi64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) {
#if defined __AVX512VL__
  return _mm_min_epu64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) {
  return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
}

static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) {
#if defined __AVX512VL__
  return _mm256_min_epu64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) {
#if defined __AVX512F__
  return _mm512_min_epu64(x, y);
#else
  return simd_bitselect(x, y, y < x);
#endif
}

static inline SIMD_CFUNC double simd_min(double x, double y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) {
  return __tg_fmin(x,y);
}

static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) {
  return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) {
  return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) {
  return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y)));
}

static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) {
#if defined __arm__ || defined __arm64__
  return vmax_s8(x, y);
#else
  return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_s8(x, y);
#elif defined __SSE4_1__
  return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) {
#if defined __AVX2__
  return _mm256_max_epi8(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) {
#if defined __AVX512BW__
  return _mm512_max_epi8(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) {
  return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) {
  return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) {
  return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y)));
}

static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) {
#if defined __arm__ || defined __arm64__
  return vmax_u8(x, y);
#else
  return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_u8(x, y);
#elif defined __SSE4_1__
  return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) {
#if defined __AVX2__
  return _mm256_max_epu8(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) {
#if defined __AVX512BW__
  return _mm512_max_epu8(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) {
  return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
}

static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) {
  return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y)));
}

static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) {
#if defined __arm__ || defined __arm64__
  return vmax_s16(x, y);
#else
  return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_s16(x, y);
#elif defined __SSE4_1__
  return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) {
#if defined __AVX2__
  return _mm256_max_epi16(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) {
#if defined __AVX512BW__
  return _mm512_max_epi16(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) {
  return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
}

static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) {
  return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y)));
}

static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) {
#if defined __arm__ || defined __arm64__
  return vmax_u16(x, y);
#else
  return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_u16(x, y);
#elif defined __SSE4_1__
  return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) {
#if defined __AVX2__
  return _mm256_max_epu16(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) {
#if defined __AVX512BW__
  return _mm512_max_epu16(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) {
#if defined __arm__ || defined __arm64__
  return vmax_s32(x, y);
#else
  return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) {
  return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y)));
}

static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_s32(x, y);
#elif defined __SSE4_1__
  return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) {
#if defined __AVX2__
  return _mm256_max_epi32(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) {
#if defined __AVX512F__
  return _mm512_max_epi32(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) {
#if defined __arm__ || defined __arm64__
  return vmax_u32(x, y);
#else
  return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
#endif

}

static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) {
  return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y)));
}

static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) {
#if defined __arm__ || defined __arm64__
  return vmaxq_u32(x, y);
#elif defined __SSE4_1__
  return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) {
#if defined __AVX2__
  return _mm256_max_epu32(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) {
#if defined __AVX512F__
  return _mm512_max_epu32(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC float simd_max(float x, float y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) {
#if defined __AVX512VL__
  return _mm_max_epi64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) {
  return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y)));
}

static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) {
#if defined __AVX512VL__
  return _mm256_max_epi64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) {
#if defined __AVX512F__
  return _mm512_max_epi64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) {
#if defined __AVX512VL__
  return _mm_max_epu64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) {
  return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y)));
}

static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) {
#if defined __AVX512VL__
  return _mm256_max_epu64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) {
#if defined __AVX512F__
  return _mm512_max_epu64(x, y);
#else
  return simd_bitselect(x, y, x < y);
#endif
}

static inline SIMD_CFUNC double simd_max(double x, double y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) {
  return __tg_fmax(x,y);
}

static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) {
  return simd_min(simd_max(x, min), max);
}

static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) {
  return simd_min(simd_max(x, min), max);
}


static inline SIMD_CFUNC float simd_sign(float x) {
  return (x == 0 | x != x) ? 0 : copysign(1,x);
}

static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC double simd_sign(double x) {
  return (x == 0 | x != x) ? 0 : copysign(1,x);
}

static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) {
  return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x);
}

static inline SIMD_CFUNC float simd_mix(float x, float y, float t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC double simd_mix(double x, double y, double t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) {
  return x + t*(y - x);
}

static inline SIMD_CFUNC float simd_recip(float x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC double simd_recip(double x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) {
#if __FAST_MATH__
  return simd_fast_recip(x);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC float simd_fast_recip(float x) {
#if defined __AVX512VL__
  simd_float4 x4 = simd_make_float4(x);
  return ((simd_float4)_mm_rcp14_ss(x4, x4)).x;
#elif defined __SSE__
  return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x;
#elif defined __ARM_NEON__
  return simd_fast_recip(simd_make_float2_undef(x)).x;
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) {
#if defined __SSE__
  return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x)));
#elif defined __ARM_NEON__
  simd_float2 r = vrecpe_f32(x);
  return r * vrecps_f32(x, r);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) {
  return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x)));
}

static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) {
#if defined __AVX512VL__
  return _mm_rcp14_ps(x);
#elif defined __SSE__
  return _mm_rcp_ps(x);
#elif defined __ARM_NEON__
  simd_float4 r = vrecpeq_f32(x);
  return r * vrecpsq_f32(x, r);
#else
  return simd_precise_recip(x);
#endif
}

static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) {
#if defined __AVX512VL__
  return _mm256_rcp14_ps(x);
#elif defined __AVX__
  return _mm256_rcp_ps(x);
#else
  return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
#endif
}

static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) {
#if defined __AVX512F__
  return _mm512_rcp14_ps(x);
#else
  return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi));
#endif
}

static inline SIMD_CFUNC double simd_fast_recip(double x) {
  return simd_precise_recip(x);
}

static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) {
  return simd_precise_recip(x);
}

static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) {
  return simd_precise_recip(x);
}

static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) {
  return simd_precise_recip(x);
}

static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) {
  return simd_precise_recip(x);
}

static inline SIMD_CFUNC float simd_precise_recip(float x) {
#if defined __SSE__
  float r = simd_fast_recip(x);
  return r*(2 - (x == 0 ? -INFINITY : x)*r);
#elif defined __ARM_NEON__
  return simd_precise_recip(simd_make_float2_undef(x)).x;
#else
  return 1/x;
#endif
}

static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) {
#if defined __SSE__
  return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x)));
#elif defined __ARM_NEON__
  simd_float2 r = simd_fast_recip(x);
  return r*vrecps_f32(x, r);
#else
  return 1/x;
#endif
}

static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) {
  return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x)));
}

static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) {
#if defined __SSE__
  simd_float4 r = simd_fast_recip(x);
  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
#elif defined __ARM_NEON__
  simd_float4 r = simd_fast_recip(x);
  return r*vrecpsq_f32(x, r);
#else
  return 1/x;
#endif
}

static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) {
#if defined __AVX__
  simd_float8 r = simd_fast_recip(x);
  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
#else
  return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
#endif
}

static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) {
#if defined __AVX512F__
  simd_float16 r = simd_fast_recip(x);
  return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r);
#else
  return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi));
#endif
}

static inline SIMD_CFUNC double simd_precise_recip(double x) {
  return 1/x;
}

static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) {
  return 1/x;
}

static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) {
  return 1/x;
}

static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) {
  return 1/x;
}

static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) {
  return 1/x;
}

static inline SIMD_CFUNC float simd_rsqrt(float x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC double simd_rsqrt(double x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) {
#if __FAST_MATH__
  return simd_fast_rsqrt(x);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC float simd_fast_rsqrt(float x) {
#if defined __AVX512VL__
  simd_float4 x4 = simd_make_float4(x);
  return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x;
#elif defined __SSE__
  return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x;
#elif defined __ARM_NEON__
  return simd_fast_rsqrt(simd_make_float2_undef(x)).x;
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) {
#if defined __SSE__
  return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x)));
#elif defined __ARM_NEON__
  simd_float2 r = vrsqrte_f32(x);
  return r * vrsqrts_f32(x, r*r);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) {
  return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x)));
}

static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) {
#if defined __AVX512VL__
  return _mm_rsqrt14_ps(x);
#elif defined __SSE__
  return _mm_rsqrt_ps(x);
#elif defined __ARM_NEON__
  simd_float4 r = vrsqrteq_f32(x);
  return r * vrsqrtsq_f32(x, r*r);
#else
  return simd_precise_rsqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) {
#if defined __AVX512VL__
  return _mm256_rsqrt14_ps(x);
#elif defined __AVX__
  return _mm256_rsqrt_ps(x);
#else
  return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
#endif
}

static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) {
#if defined __AVX512F__
  return _mm512_rsqrt14_ps(x);
#else
  return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi));
#endif
}

static inline SIMD_CFUNC double simd_fast_rsqrt(double x) {
  return simd_precise_rsqrt(x);
}

static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) {
  return simd_precise_rsqrt(x);
}

static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) {
  return simd_precise_rsqrt(x);
}

static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) {
  return simd_precise_rsqrt(x);
}

static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) {
  return simd_precise_rsqrt(x);
}

static inline SIMD_CFUNC float simd_precise_rsqrt(float x) {
#if defined __SSE__
  float r = simd_fast_rsqrt(x);
  return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r);
#elif defined __ARM_NEON__
  return simd_precise_rsqrt(simd_make_float2_undef(x)).x;
#else
  return 1/sqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) {
#if defined __SSE__
  return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x)));
#elif defined __ARM_NEON__
  simd_float2 r = simd_fast_rsqrt(x);
  return r*vrsqrts_f32(x, r*r);
#else
  return 1/__tg_sqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) {
  return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x)));
}

static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) {
#if defined __SSE__
  simd_float4 r = simd_fast_rsqrt(x);
  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
#elif defined __ARM_NEON__
  simd_float4 r = simd_fast_rsqrt(x);
  return r*vrsqrtsq_f32(x, r*r);
#else
  return 1/__tg_sqrt(x);
#endif
}

static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) {
#if defined __AVX__
  simd_float8 r = simd_fast_rsqrt(x);
  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
#else
  return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
#endif
}

static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) {
#if defined __AVX512F__
  simd_float16 r = simd_fast_rsqrt(x);
  return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r);
#else
  return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi));
#endif
}

static inline SIMD_CFUNC double simd_precise_rsqrt(double x) {
  return 1/sqrt(x);
}

static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) {
  return 1/__tg_sqrt(x);
}

static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) {
  return 1/__tg_sqrt(x);
}

static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) {
  return 1/__tg_sqrt(x);
}

static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) {
  return 1/__tg_sqrt(x);
}

static inline SIMD_CFUNC float simd_fract(float x) {
  return fmin(x - floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f);
}

static inline SIMD_CFUNC double simd_fract(double x) {
  return fmin(x - floor(x), 0x1.fffffffffffffp-1);
}

static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
}

static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
}

static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
}

static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) {
  return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1);
}

static inline SIMD_CFUNC float simd_step(float edge, float x) {
  return !(x < edge);
}

static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) {
  return simd_bitselect((simd_float2)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) {
  return simd_bitselect((simd_float3)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) {
  return simd_bitselect((simd_float4)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) {
  return simd_bitselect((simd_float8)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) {
  return simd_bitselect((simd_float16)1, 0, x < edge);
}

static inline SIMD_CFUNC double simd_step(double edge, double x) {
  return !(x < edge);
}

static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) {
  return simd_bitselect((simd_double2)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) {
  return simd_bitselect((simd_double3)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) {
  return simd_bitselect((simd_double4)1, 0, x < edge);
}

static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) {
  return simd_bitselect((simd_double8)1, 0, x < edge);
}

static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) {
  float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) {
  simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) {
  simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) {
  simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) {
  simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) {
  simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) {
  double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) {
  simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) {
  simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) {
  simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) {
  simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1);
  return t*t*(3 - 2*t);
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) {
  return x.x + x.y;
}

static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) {
  return x.x + x.y + x.z;
}

static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) {
  return simd_reduce_add(x.lo + x.hi);
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) {
  char t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) {
#if defined __arm64__
  return vminvq_s8(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) {
  unsigned char t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) {
#if defined __arm64__
  return vminvq_u8(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) {
  short t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) {
#if defined __arm64__
  return vminvq_s16(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) {
  unsigned short t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) {
#if defined __arm64__
  return vminvq_u16(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) {
  int t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) {
#if defined __arm64__
  return vminvq_s32(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) {
  unsigned int t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) {
#if defined __arm64__
  return vminvq_u32(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) {
  simd_long1 t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) {
  return x.y < x.x ? x.y : x.x;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) {
  simd_ulong1 t = x.z < x.x ? x.z : x.x;
  return x.y < t ? x.y : t;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) {
  return fmin(x.x, x.y);
}

static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) {
  return fmin(fmin(x.x, x.z), x.y);
}

static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) {
#if defined __arm64__
  return vminvq_f32(x);
#else
  return simd_reduce_min(simd_min(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) {
#if defined __arm64__
  return vminvq_f64(x);
#else
  return fmin(x.x, x.y);
#endif
}

static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) {
  return fmin(fmin(x.x, x.z), x.y);
}

static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) {
  return simd_reduce_min(simd_min(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) {
  char t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) {
#if defined __arm64__
  return vmaxvq_s8(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) {
  unsigned char t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) {
#if defined __arm64__
  return vmaxvq_u8(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) {
  short t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) {
#if defined __arm64__
  return vmaxvq_s16(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) {
  unsigned short t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) {
#if defined __arm64__
  return vmaxvq_u16(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) {
  int t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) {
#if defined __arm64__
  return vmaxvq_s32(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) {
  unsigned int t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) {
#if defined __arm64__
  return vmaxvq_u32(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) {
  simd_long1 t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) {
  return x.y > x.x ? x.y : x.x;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) {
  simd_ulong1 t = x.z > x.x ? x.z : x.x;
  return x.y > t ? x.y : t;
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) {
  return fmax(x.x, x.y);
}

static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) {
  return fmax(fmax(x.x, x.z), x.y);
}

static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) {
#if defined __arm64__
  return vmaxvq_f32(x);
#else
  return simd_reduce_max(simd_max(x.lo, x.hi));
#endif
}

static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) {
#if defined __arm64__
  return vmaxvq_f64(x);
#else
  return fmax(x.x, x.y);
#endif
}

static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) {
  return fmax(fmax(x.x, x.z), x.y);
}

static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) {
  return simd_reduce_max(simd_max(x.lo, x.hi));
}

#ifdef __cplusplus
}
#endif
#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */
#endif /* SIMD_COMMON_HEADER */