/*========================================================================= * * Copyright Insight Software Consortium * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ /*========================================================================= * * Portions of this file are subject to the VTK Toolkit Version 3 copyright. * * Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen * * For complete copyright, license and disclaimer of warranty information * please refer to the NOTICE file at the top of the ITK source tree. * *=========================================================================*/ #ifndef itkMathDetail_h #define itkMathDetail_h #include "itkIntTypes.h" #include "itkNumericTraits.h" #ifdef ITK_HAVE_FENV_H // The Sun Studio CC compiler seems to have a bug where if cstdio is // included stdio.h must also be included before fenv.h #include #include // should this be cfenv? #endif /* ITK_HAVE_FENV_H */ #if defined( ITK_HAVE_EMMINTRIN_H ) && !defined( ITK_WRAPPING_PARSER ) #include // sse 2 intrinsics #endif /* ITK_HAVE_EMMINTRIN_H && ! ITK_WRAPPING_PARSER */ // assume no SSE2: #define USE_SSE2_64IMPL 0 #define USE_SSE2_32IMPL 0 // For apple assume sse2 is on for all intel builds, check for 64 and 32 // bit versions #if defined(__APPLE__) && defined( __SSE2__ ) && !defined( ITK_WRAPPING_PARSER ) # if defined( __i386__ ) # undef USE_SSE2_32IMPL # define USE_SSE2_32IMPL 1 # endif # if defined( __x86_64 ) // Turn on the 64 bits implementation # undef USE_SSE2_64IMPL # define USE_SSE2_64IMPL 1 // Turn on also the 32 bits implementation // since it is available in 64 bits versions. # undef USE_SSE2_32IMPL # define USE_SSE2_32IMPL 1 # endif #else // For non-apple (no universal binary possible) just use the // try-compile set ITK_COMPILER_SUPPORTS_SSE2_32 and // ITK_COMPILER_SUPPORTS_SSE2_64 to set values: # if defined(ITK_COMPILER_SUPPORTS_SSE2_32) && !defined( ITK_WRAPPING_PARSER ) # undef USE_SSE2_32IMPL # define USE_SSE2_32IMPL 1 # endif # if defined(ITK_COMPILER_SUPPORTS_SSE2_64) && !defined( ITK_WRAPPING_PARSER ) # undef USE_SSE2_64IMPL # define USE_SSE2_64IMPL 1 # endif #endif // Turn on 32-bit and 64-bit asm impl when using GCC on x86 platform with the // following exception: // GCCXML #if defined( __GNUC__ ) && ( !defined( ITK_WRAPPING_PARSER ) ) && ( defined( __i386__ ) || defined( __i386 ) \ || defined( __x86_64__ ) || defined( __x86_64 ) ) #define GCC_USE_ASM_32IMPL 1 #define GCC_USE_ASM_64IMPL 1 #else #define GCC_USE_ASM_32IMPL 0 #define GCC_USE_ASM_64IMPL 0 #endif // Turn on 32-bit and 64-bit asm impl when using msvc on 32 bits windows #if defined( VCL_VC ) && ( !defined( ITK_WRAPPING_PARSER ) ) && !defined( _WIN64 ) #define VC_USE_ASM_32IMPL 1 #define VC_USE_ASM_64IMPL 1 #else #define VC_USE_ASM_32IMPL 0 #define VC_USE_ASM_64IMPL 0 #endif namespace itk { namespace Math { namespace Detail { // The functions defined in this namespace are not meant to be used directly // and thus do not adhere to the standard backward-compatibility // policy of ITK, as any Detail namespace should be considered private. // Please use the functions from the itk::Math namespace instead //////////////////////////////////////// // Base versions CLANG_PRAGMA_PUSH CLANG_SUPPRESS_Wfloat_equal template< typename TReturn, typename TInput > inline TReturn RoundHalfIntegerToEven_base(TInput x) { if ( NumericTraits< TInput >::IsNonnegative(x) ) { x += static_cast< TInput >( 0.5 ); } else { x -= static_cast< TInput >( 0.5 ); } const TReturn r = static_cast< TReturn >( x ); return ( x != static_cast< TInput >( r ) ) ? r : static_cast< TReturn >( 2 * ( r / 2 ) ); } template< typename TReturn, typename TInput > inline TReturn RoundHalfIntegerUp_base(TInput x) { x += static_cast< TInput >( 0.5 ); const TReturn r = static_cast< TReturn >( x ); return ( NumericTraits< TInput >::IsNonnegative(x) ) ? r : ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) ); } template< typename TReturn, typename TInput > inline TReturn Floor_base(TInput x) { const TReturn r = static_cast< TReturn >( x ); return ( NumericTraits< TInput >::IsNonnegative(x) ) ? r : ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) ); } template< typename TReturn, typename TInput > inline TReturn Ceil_base(TInput x) { const TReturn r = static_cast< TReturn >( x ); return ( NumericTraits< TInput >::IsNegative(x) ) ? r : ( x == static_cast< TInput >( r ) ? r : r + static_cast< TReturn >( 1 ) ); } CLANG_PRAGMA_POP //////////////////////////////////////// // 32 bits versions #if USE_SSE2_32IMPL // sse2 implementation inline int32_t RoundHalfIntegerToEven_32(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif return _mm_cvtsd_si32( _mm_set_sd(x) ); } inline int32_t RoundHalfIntegerToEven_32(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif return _mm_cvtss_si32( _mm_set_ss(x) ); } #elif GCC_USE_ASM_32IMPL // gcc asm implementation inline int32_t RoundHalfIntegerToEven_32(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int32_t r; __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" ); return r; } inline int32_t RoundHalfIntegerToEven_32(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int32_t r; __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" ); return r; } #elif VC_USE_ASM_32IMPL // msvc asm implementation inline int32_t RoundHalfIntegerToEven_32(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int32_t r; __asm { fld x fistp r } return r; } inline int32_t RoundHalfIntegerToEven_32(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int32_t r; __asm { fld x fistp r } return r; } #else // Base implementation inline int32_t RoundHalfIntegerToEven_32(double x) { return RoundHalfIntegerToEven_base< int32_t, double >(x); } inline int32_t RoundHalfIntegerToEven_32(float x) { return RoundHalfIntegerToEven_base< int32_t, float >(x); } #endif #if USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerToEven_32(2 * x + 0.5) >> 1; } inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerToEven_32(2 * x + 0.5f) >> 1; } inline int32_t Floor_32(double x) { return RoundHalfIntegerToEven_32(2 * x - 0.5) >> 1; } inline int32_t Floor_32(float x) { return RoundHalfIntegerToEven_32(2 * x - 0.5f) >> 1; } inline int32_t Ceil_32(double x) { return -( RoundHalfIntegerToEven_32(-0.5 - 2 * x) >> 1 ); } inline int32_t Ceil_32(float x) { return -( RoundHalfIntegerToEven_32(-0.5f - 2 * x) >> 1 ); } #else // Base implementation inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerUp_base< int32_t, double >(x); } inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerUp_base< int32_t, float >(x); } inline int32_t Floor_32(double x) { return Floor_base< int32_t, double >(x); } inline int32_t Floor_32(float x) { return Floor_base< int32_t, float >(x); } inline int32_t Ceil_32(double x) { return Ceil_base< int32_t, double >(x); } inline int32_t Ceil_32(float x) { return Ceil_base< int32_t, float >(x); } #endif // USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL //////////////////////////////////////// // 64 bits versions #if USE_SSE2_64IMPL // sse2 implementation inline int64_t RoundHalfIntegerToEven_64(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif return _mm_cvtsd_si64( _mm_set_sd(x) ); } inline int64_t RoundHalfIntegerToEven_64(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif return _mm_cvtss_si64( _mm_set_ss(x) ); } #elif GCC_USE_ASM_64IMPL // gcc asm implementation inline int64_t RoundHalfIntegerToEven_64(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int64_t r; __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" ); return r; } inline int64_t RoundHalfIntegerToEven_64(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int64_t r; __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" ); return r; } #elif VC_USE_ASM_64IMPL // msvc asm implementation inline int64_t RoundHalfIntegerToEven_64(double x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int64_t r; __asm { fld x fistp r } return r; } inline int64_t RoundHalfIntegerToEven_64(float x) { #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); #endif int64_t r; __asm { fld x fistp r } return r; } #else // Base implementation inline int64_t RoundHalfIntegerToEven_64(double x) { return RoundHalfIntegerToEven_base< int64_t, double >(x); } inline int64_t RoundHalfIntegerToEven_64(float x) { return RoundHalfIntegerToEven_base< int64_t, float >(x); } #endif #if USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerToEven_64(2 * x + 0.5) >> 1; } inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerToEven_64(2 * x + 0.5f) >> 1; } inline int64_t Floor_64(double x) { return RoundHalfIntegerToEven_64(2 * x - 0.5) >> 1; } inline int64_t Floor_64(float x) { return RoundHalfIntegerToEven_64(2 * x - 0.5f) >> 1; } inline int64_t Ceil_64(double x) { return -( RoundHalfIntegerToEven_64(-0.5 - 2 * x) >> 1 ); } inline int64_t Ceil_64(float x) { return -( RoundHalfIntegerToEven_64(-0.5f - 2 * x) >> 1 ); } #else // Base implementation inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerUp_base< int64_t, double >(x); } inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerUp_base< int64_t, float >(x); } inline int64_t Floor_64(double x) { return Floor_base< int64_t, double >(x); } inline int64_t Floor_64(float x) { return Floor_base< int64_t, float >(x); } inline int64_t Ceil_64(double x) { return Ceil_base< int64_t, double >(x); } inline int64_t Ceil_64(float x) { return Ceil_base< int64_t, float >(x); } #endif // USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL template struct FloatIEEETraits; template <> struct FloatIEEETraits { typedef int32_t IntType; typedef uint32_t UIntType; }; template <> struct FloatIEEETraits { typedef int64_t IntType; typedef uint64_t UIntType; }; template union FloatIEEE { typedef T FloatType; typedef typename FloatIEEETraits::IntType IntType; typedef typename FloatIEEETraits::UIntType UIntType; FloatType asFloat; IntType asInt; UIntType asUInt; FloatIEEE(FloatType f): asFloat(f) {} FloatIEEE(IntType i): asInt(i) {} bool Sign() const { return (asUInt >> (sizeof(asUInt)*8-1)) != 0; } IntType AsULP() const { return this->Sign()? IntType(~(~UIntType(0) >> 1) - asUInt) : asInt; } }; } // end namespace Detail } // end namespace Math // move to itkConceptChecking? namespace Concept { template< typename T > struct FloatOrDouble; template< > struct FloatOrDouble< float > {}; template< > struct FloatOrDouble< double > {}; } // end namespace Concept } // end namespace itk #undef USE_SSE2_32IMPL #undef GCC_USE_ASM_32IMPL #undef VC_USE_ASM_32IMPL #undef USE_SSE2_64IMPL #undef GCC_USE_ASM_64IMPL #undef VC_USE_ASM_64IMPL #endif // end of itkMathDetail.h