diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index f3281d3cb..c8b6ed30e 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -418,7 +418,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE) list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include) link_directories(${CMAKE_BINARY_DIR}/sleef/lib) else() - add_library(sleef SHARED IMPORTED) + add_library(sleef UNKNOWN IMPORTED) find_library(SLEEF_LIBRARY sleef) if(NOT SLEEF_LIBRARY) message(FATAL_ERROR "Cannot find sleef") @@ -426,7 +426,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE) message("Found sleef: ${SLEEF_LIBRARY}") set_target_properties(sleef PROPERTIES IMPORTED_LOCATION "${SLEEF_LIBRARY}") endif() - list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef) + list(APPEND ATen_CPU_DEPENDENCY_LIBS ${SLEEF_LIBRARY}) set(CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG}) set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS}) diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h index f3281d3cb..c8b6ed30e 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h @@ -21,6 +21,17 @@ inline namespace CPU_CAPABILITY { #if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER) +#ifndef SLEEF_CONST +#if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) +#define SLEEF_CONST const +#else +#define SLEEF_CONST +#endif +#define SLEEF_CONST_OLD SLEEF_CONST +#else +#define SLEEF_CONST_OLD +#endif + // bfloat16 conversion static inline void cvtbf16_fp32(const __m128i& a, __m256& o) { o = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_cvtepu16_epi32(a), 16)); @@ -266,7 +277,7 @@ public: } return b; } - Vectorized map(const __m256 (*const vop)(__m256)) const { + Vectorized map(SLEEF_CONST __m256 (*SLEEF_CONST_OLD vop)(__m256)) const { __m256 lo, hi; cvt_to_fp32(values, lo, hi); const auto o1 = vop(lo); diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h index f3281d3cb..c8b6ed30e 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h @@ -18,6 +18,17 @@ inline namespace CPU_CAPABILITY { #if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER) +#ifndef SLEEF_CONST +#if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) +#define SLEEF_CONST const +#else +#define SLEEF_CONST +#endif +#define SLEEF_CONST_OLD SLEEF_CONST +#else +#define SLEEF_CONST_OLD +#endif + // bfloat16 conversion static inline void cvtbf16_fp32(const __m256i& a, __m512& o) { o = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16)); @@ -345,7 +356,7 @@ public: } #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wignored-qualifiers" - Vectorized map(const __m512 (*const vop)(__m512)) const { + Vectorized map(SLEEF_CONST __m512 (*SLEEF_CONST_OLD vop)(__m512)) const { __m512 lo, hi; cvt_to_fp32(values, lo, hi); const auto o1 = vop(lo);