cuda.cmake 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. if(NOT WITH_GPU)
  2. return()
  3. endif()
  4. # This is to eliminate the CMP0104 warnings from cmake 3.18+.
  5. # Instead of setting CUDA_ARCHITECTURES, we will set CMAKE_CUDA_FLAGS.
  6. set(CMAKE_CUDA_ARCHITECTURES OFF)
  7. if(BUILD_ON_JETSON)
  8. set(fd_known_gpu_archs "53 62 72")
  9. set(fd_known_gpu_archs10 "53 62 72")
  10. else()
  11. message("Using New Release Strategy - All Arches Packge")
  12. set(fd_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  13. set(fd_known_gpu_archs10 "35 50 52 60 61 70 75")
  14. set(fd_known_gpu_archs11 "50 60 61 70 75 80")
  15. endif()
  16. ######################################################################################
  17. # A function for automatic detection of GPUs installed (if autodetection is enabled)
  18. # Usage:
  19. # detect_installed_gpus(out_variable)
  20. function(detect_installed_gpus out_variable)
  21. if(NOT CUDA_gpu_detect_output)
  22. set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
  23. file(
  24. WRITE ${cufile}
  25. ""
  26. "#include \"stdio.h\"\n"
  27. "#include \"cuda.h\"\n"
  28. "#include \"cuda_runtime.h\"\n"
  29. "int main() {\n"
  30. " int count = 0;\n"
  31. " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
  32. " if (count == 0) return -1;\n"
  33. " for (int device = 0; device < count; ++device) {\n"
  34. " cudaDeviceProp prop;\n"
  35. " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
  36. " printf(\"%d.%d \", prop.major, prop.minor);\n"
  37. " }\n"
  38. " return 0;\n"
  39. "}\n")
  40. execute_process(
  41. COMMAND "${CMAKE_CUDA_COMPILER}" "--run" "${cufile}"
  42. WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
  43. RESULT_VARIABLE nvcc_res
  44. OUTPUT_VARIABLE nvcc_out
  45. ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
  46. if(nvcc_res EQUAL 0)
  47. # only keep the last line of nvcc_out
  48. string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
  49. string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
  50. list(GET nvcc_out -1 nvcc_out)
  51. string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
  52. set(CUDA_gpu_detect_output
  53. ${nvcc_out}
  54. CACHE INTERNAL
  55. "Returned GPU architetures from detect_installed_gpus tool"
  56. FORCE)
  57. endif()
  58. endif()
  59. if(NOT CUDA_gpu_detect_output)
  60. message(
  61. STATUS
  62. "Automatic GPU detection failed. Building for all known architectures.")
  63. set(${out_variable}
  64. ${fd_known_gpu_archs}
  65. PARENT_SCOPE)
  66. else()
  67. set(${out_variable}
  68. ${CUDA_gpu_detect_output}
  69. PARENT_SCOPE)
  70. endif()
  71. endfunction()
  72. ########################################################################
  73. # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
  74. # Usage:
  75. # select_nvcc_arch_flags(out_variable)
  76. function(select_nvcc_arch_flags out_variable)
  77. # List of arch names
  78. set(archs_names
  79. "Kepler"
  80. "Maxwell"
  81. "Pascal"
  82. "Volta"
  83. "Turing"
  84. "Ampere"
  85. "All"
  86. "Manual")
  87. set(archs_name_default "All")
  88. list(APPEND archs_names "Auto")
  89. # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
  90. set(CUDA_ARCH_NAME
  91. ${archs_name_default}
  92. CACHE STRING "Select target NVIDIA GPU achitecture.")
  93. set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
  94. mark_as_advanced(CUDA_ARCH_NAME)
  95. # verify CUDA_ARCH_NAME value
  96. if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
  97. string(REPLACE ";" ", " archs_names "${archs_names}")
  98. message(
  99. FATAL_ERROR "Only ${archs_names} architectures names are supported.")
  100. endif()
  101. if(${CUDA_ARCH_NAME} STREQUAL "Manual")
  102. set(CUDA_ARCH_BIN
  103. ${fd_known_gpu_archs}
  104. CACHE
  105. STRING
  106. "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
  107. )
  108. set(CUDA_ARCH_PTX
  109. ""
  110. CACHE
  111. STRING
  112. "Specify 'virtual' PTX architectures to build PTX intermediate code for"
  113. )
  114. mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  115. else()
  116. unset(CUDA_ARCH_BIN CACHE)
  117. unset(CUDA_ARCH_PTX CACHE)
  118. endif()
  119. if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
  120. set(cuda_arch_bin "30 35")
  121. elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
  122. if(BUILD_ON_JETSON)
  123. set(cuda_arch_bin "53")
  124. else()
  125. set(cuda_arch_bin "50")
  126. endif()
  127. elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
  128. if(BUILD_ON_JETSON)
  129. set(cuda_arch_bin "62")
  130. else()
  131. set(cuda_arch_bin "60 61")
  132. endif()
  133. elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
  134. if(BUILD_ON_JETSON)
  135. set(cuda_arch_bin "72")
  136. else()
  137. set(cuda_arch_bin "70")
  138. endif()
  139. elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
  140. set(cuda_arch_bin "75")
  141. elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
  142. if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
  143. set(cuda_arch_bin "80")
  144. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
  145. set(cuda_arch_bin "80 86")
  146. endif()
  147. elseif(${CUDA_ARCH_NAME} STREQUAL "All")
  148. set(cuda_arch_bin ${fd_known_gpu_archs})
  149. elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
  150. message(
  151. STATUS
  152. "WARNING: This is just a warning for publishing release.
  153. You are building GPU version without supporting different architectures.
  154. So the wheel package may fail on other GPU architectures.
  155. You can add -DCUDA_ARCH_NAME=All in cmake command
  156. to get a full wheel package to resolve this warning.
  157. While, this version will still work on local GPU architecture.")
  158. detect_installed_gpus(cuda_arch_bin)
  159. else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
  160. set(cuda_arch_bin ${CUDA_ARCH_BIN})
  161. endif()
  162. if(NEW_RELEASE_JIT)
  163. set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
  164. set(cuda_arch_bin "")
  165. endif()
  166. # remove dots and convert to lists
  167. string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
  168. string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
  169. string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
  170. string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
  171. list(REMOVE_DUPLICATES cuda_arch_bin)
  172. list(REMOVE_DUPLICATES cuda_arch_ptx)
  173. set(nvcc_flags "")
  174. set(nvcc_archs_readable "")
  175. # Tell NVCC to add binaries for the specified GPUs
  176. foreach(arch ${cuda_arch_bin})
  177. if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
  178. # User explicitly specified PTX for the concrete BIN
  179. string(APPEND nvcc_flags
  180. " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
  181. string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
  182. else()
  183. # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
  184. string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
  185. string(APPEND nvcc_archs_readable " sm_${arch}")
  186. endif()
  187. endforeach()
  188. # Tell NVCC to add PTX intermediate code for the specified architectures
  189. foreach(arch ${cuda_arch_ptx})
  190. string(APPEND nvcc_flags
  191. " -gencode arch=compute_${arch},code=compute_${arch}")
  192. string(APPEND nvcc_archs_readable " compute_${arch}")
  193. endforeach()
  194. string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
  195. set(${out_variable}
  196. ${nvcc_flags}
  197. PARENT_SCOPE)
  198. set(${out_variable}_readable
  199. ${nvcc_archs_readable}
  200. PARENT_SCOPE)
  201. endfunction()
  202. message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
  203. if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
  204. set(fd_known_gpu_archs ${fd_known_gpu_archs10})
  205. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  206. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  207. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  208. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
  209. set(fd_known_gpu_archs ${fd_known_gpu_archs11})
  210. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  211. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  212. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  213. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
  214. set(fd_known_gpu_archs "${fd_known_gpu_archs11} 86")
  215. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  216. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  217. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  218. endif()
  219. # setting nvcc arch flags
  220. select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
  221. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
  222. message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
  223. # Set C++14 support
  224. set(CUDA_PROPAGATE_HOST_FLAGS OFF)
  225. # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
  226. # So, don't set these flags here.
  227. if(NOT DEFINED CMAKE_CUDA_STANDARD)
  228. set(CMAKE_CUDA_STANDARD 11)
  229. else()
  230. message(WARNING "Detected custom CMAKE_CUDA_STANDARD is using: ${CMAKE_CUDA_STANDARD}")
  231. endif()
  232. # (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
  233. # So replace /W[1-4] with /W0
  234. if(WIN32)
  235. string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
  236. endif()
  237. # in cuda9, suppress cuda warning on eigen
  238. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
  239. # Set :expt-relaxed-constexpr to suppress Eigen warnings
  240. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
  241. # Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
  242. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
  243. if(WIN32)
  244. set(CMAKE_CUDA_FLAGS
  245. "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
  246. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
  247. if(MSVC_STATIC_CRT)
  248. foreach(flag_var
  249. CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
  250. CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
  251. if(${flag_var} MATCHES "-MD")
  252. string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
  253. endif()
  254. endforeach()
  255. endif()
  256. endif()
  257. mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
  258. mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)