PKGBUILDs/community/blender/D8063-cuda11.diff
2020-06-26 12:42:03 +00:00

106 lines
4.4 KiB
Diff

diff --git a/CMakeLists.txt b/CMakeLists.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -375,7 +375,7 @@
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 CACHE STRING "CUDA architectures to build binaries for")
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_80 CACHE STRING "CUDA architectures to build binaries for")
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,7 +52,7 @@
set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
-set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75 CACHE STRING "" FORCE)
+set(CYCLES_CUDA_BINARIES_ARCH sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61;sm_70;sm_75;sm_80 CACHE STRING "" FORCE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
# platform dependent options
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -313,7 +313,7 @@
set(MAX_MSVC 1910)
elseif(${CUDA_VERSION} EQUAL "9.1")
set(MAX_MSVC 1911)
- elseif(${CUDA_VERSION} LESS "11.0")
+ elseif(${CUDA_VERSION} LESS "12.0")
set(MAX_MSVC 1999)
endif()
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -485,8 +485,12 @@
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES "sm_2.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
+ elseif(${arch} MATCHES "sm_30" AND ${CUDA_VERSION} GREATER 109)
+ message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
+ elseif(${arch} MATCHES "sm_8." AND ${CUDA_VERSION} LESS 110)
+ message(STATUS "CUDA binaries for ${arch} require CUDA 11.0+, skipped.")
else()
# Compile regular kernel
CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
@@ -525,6 +529,11 @@
set(cuda_flags ${cuda_flags}
-D __KERNEL_DEBUG__)
endif()
+ set(OPTIX_TARGET 30)
+ if(${CUDA_VERSION} GREATER 109) #cuda 11
+ set(OPTIX_TARGET 52)
+ endif()
+
if(WITH_CYCLES_CUBIN_COMPILER)
# Needed to find libnvrtc-builtins.so. Can't do it from inside
@@ -536,7 +545,6 @@
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
-
add_custom_command(
OUTPUT ${output}
DEPENDS
@@ -551,7 +559,7 @@
${SRC_UTIL_HEADERS}
COMMAND ${CUBIN_CC_ENV}
"$<TARGET_FILE:cycles_cubin_cc>"
- -target 30
+ -target ${OPTIX_TARGET}
-ptx
-i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
${cuda_flags}
@@ -575,7 +583,7 @@
COMMAND
${CUDA_NVCC_EXECUTABLE}
--ptx
- -arch=sm_30
+ -arch=sm_${OPTIX_TARGET}
${cuda_flags}
${input}
WORKING_DIRECTORY
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h
--- a/intern/cycles/kernel/kernels/cuda/kernel_config.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h
@@ -70,8 +70,8 @@
# endif
# define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
-/* 7.x */
-#elif __CUDA_ARCH__ <= 799
+/* 7.x / 8.x */
+#elif __CUDA_ARCH__ <= 899
# define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
# define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
# define CUDA_BLOCK_MAX_THREADS 1024