85 lines
3.1 KiB
Diff
85 lines
3.1 KiB
Diff
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
|
|
index f3281d3cb..c8b6ed30e 100644
|
|
--- a/aten/src/ATen/CMakeLists.txt
|
|
+++ b/aten/src/ATen/CMakeLists.txt
|
|
@@ -418,7 +418,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
|
|
list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include)
|
|
link_directories(${CMAKE_BINARY_DIR}/sleef/lib)
|
|
else()
|
|
- add_library(sleef SHARED IMPORTED)
|
|
+ add_library(sleef UNKNOWN IMPORTED)
|
|
find_library(SLEEF_LIBRARY sleef)
|
|
if(NOT SLEEF_LIBRARY)
|
|
message(FATAL_ERROR "Cannot find sleef")
|
|
@@ -426,7 +426,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
|
|
message("Found sleef: ${SLEEF_LIBRARY}")
|
|
set_target_properties(sleef PROPERTIES IMPORTED_LOCATION "${SLEEF_LIBRARY}")
|
|
endif()
|
|
- list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef)
|
|
+ list(APPEND ATen_CPU_DEPENDENCY_LIBS ${SLEEF_LIBRARY})
|
|
|
|
set(CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG})
|
|
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
|
|
diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h
|
|
index f3281d3cb..c8b6ed30e 100644
|
|
--- a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h
|
|
+++ b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h
|
|
@@ -21,6 +21,17 @@ inline namespace CPU_CAPABILITY {
|
|
|
|
#if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
|
|
|
|
+#ifndef SLEEF_CONST
|
|
+#if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER)
|
|
+#define SLEEF_CONST const
|
|
+#else
|
|
+#define SLEEF_CONST
|
|
+#endif
|
|
+#define SLEEF_CONST_OLD SLEEF_CONST
|
|
+#else
|
|
+#define SLEEF_CONST_OLD
|
|
+#endif
|
|
+
|
|
// bfloat16 conversion
|
|
static inline void cvtbf16_fp32(const __m128i& a, __m256& o) {
|
|
o = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_cvtepu16_epi32(a), 16));
|
|
@@ -266,7 +277,7 @@ public:
|
|
}
|
|
return b;
|
|
}
|
|
- Vectorized<T> map(const __m256 (*const vop)(__m256)) const {
|
|
+ Vectorized<T> map(SLEEF_CONST __m256 (*SLEEF_CONST_OLD vop)(__m256)) const {
|
|
__m256 lo, hi;
|
|
cvt_to_fp32<T>(values, lo, hi);
|
|
const auto o1 = vop(lo);
|
|
diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h
|
|
index f3281d3cb..c8b6ed30e 100644
|
|
--- a/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h
|
|
+++ b/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h
|
|
@@ -18,6 +18,17 @@ inline namespace CPU_CAPABILITY {
|
|
|
|
#if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER)
|
|
|
|
+#ifndef SLEEF_CONST
|
|
+#if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER)
|
|
+#define SLEEF_CONST const
|
|
+#else
|
|
+#define SLEEF_CONST
|
|
+#endif
|
|
+#define SLEEF_CONST_OLD SLEEF_CONST
|
|
+#else
|
|
+#define SLEEF_CONST_OLD
|
|
+#endif
|
|
+
|
|
// bfloat16 conversion
|
|
static inline void cvtbf16_fp32(const __m256i& a, __m512& o) {
|
|
o = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(a), 16));
|
|
@@ -345,7 +356,7 @@ public:
|
|
}
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wignored-qualifiers"
|
|
- Vectorized<T> map(const __m512 (*const vop)(__m512)) const {
|
|
+ Vectorized<T> map(SLEEF_CONST __m512 (*SLEEF_CONST_OLD vop)(__m512)) const {
|
|
__m512 lo, hi;
|
|
cvt_to_fp32<T>(values, lo, hi);
|
|
const auto o1 = vop(lo);
|