cuda.cmake 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. if(NOT WITH_GPU)
  2. return()
  3. endif()
  4. # This is to eliminate the CMP0104 warnings from cmake 3.18+.
  5. # Instead of setting CUDA_ARCHITECTURES, we will set CMAKE_CUDA_FLAGS.
  6. set(CMAKE_CUDA_ARCHITECTURES OFF)
  7. if(BUILD_ON_JETSON)
  8. set(fd_known_gpu_archs "53 62 72")
  9. set(fd_known_gpu_archs10 "53 62 72")
  10. else()
  11. message("Using New Release Strategy - All Arches Package")
  12. set(fd_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  13. set(fd_known_gpu_archs10 "35 50 52 60 61 70 75")
  14. set(fd_known_gpu_archs11 "50 60 61 70 75 80")
  15. set(fd_known_gpu_archs12 "50 60 61 70 75 80")
  16. endif()
  17. ######################################################################################
  18. # A function for automatic detection of GPUs installed (if autodetection is enabled)
  19. # Usage:
  20. # detect_installed_gpus(out_variable)
  21. function(detect_installed_gpus out_variable)
  22. if(NOT CUDA_gpu_detect_output)
  23. set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
  24. file(
  25. WRITE ${cufile}
  26. ""
  27. "#include \"stdio.h\"\n"
  28. "#include \"cuda.h\"\n"
  29. "#include \"cuda_runtime.h\"\n"
  30. "int main() {\n"
  31. " int count = 0;\n"
  32. " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
  33. " if (count == 0) return -1;\n"
  34. " for (int device = 0; device < count; ++device) {\n"
  35. " cudaDeviceProp prop;\n"
  36. " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
  37. " printf(\"%d.%d \", prop.major, prop.minor);\n"
  38. " }\n"
  39. " return 0;\n"
  40. "}\n")
  41. execute_process(
  42. COMMAND "${CMAKE_CUDA_COMPILER}" "--run" "${cufile}"
  43. WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
  44. RESULT_VARIABLE nvcc_res
  45. OUTPUT_VARIABLE nvcc_out
  46. ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
  47. if(nvcc_res EQUAL 0)
  48. # only keep the last line of nvcc_out
  49. string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
  50. string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
  51. list(GET nvcc_out -1 nvcc_out)
  52. string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
  53. set(CUDA_gpu_detect_output
  54. ${nvcc_out}
  55. CACHE INTERNAL
  56. "Returned GPU architectures from detect_installed_gpus tool"
  57. FORCE)
  58. endif()
  59. endif()
  60. if(NOT CUDA_gpu_detect_output)
  61. message(
  62. STATUS
  63. "Automatic GPU detection failed. Building for all known architectures.")
  64. set(${out_variable}
  65. ${fd_known_gpu_archs}
  66. PARENT_SCOPE)
  67. else()
  68. set(${out_variable}
  69. ${CUDA_gpu_detect_output}
  70. PARENT_SCOPE)
  71. endif()
  72. endfunction()
  73. ########################################################################
  74. # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
  75. # Usage:
  76. # select_nvcc_arch_flags(out_variable)
  77. function(select_nvcc_arch_flags out_variable)
  78. # List of arch names
  79. set(archs_names
  80. "Kepler"
  81. "Maxwell"
  82. "Pascal"
  83. "Volta"
  84. "Turing"
  85. "Ampere"
  86. "All"
  87. "Manual")
  88. set(archs_name_default "All")
  89. list(APPEND archs_names "Auto")
  90. # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
  91. set(CUDA_ARCH_NAME
  92. ${archs_name_default}
  93. CACHE STRING "Select target NVIDIA GPU architecture.")
  94. set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
  95. mark_as_advanced(CUDA_ARCH_NAME)
  96. # verify CUDA_ARCH_NAME value
  97. if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
  98. string(REPLACE ";" ", " archs_names "${archs_names}")
  99. message(
  100. FATAL_ERROR "Only ${archs_names} architectures names are supported.")
  101. endif()
  102. if(${CUDA_ARCH_NAME} STREQUAL "Manual")
  103. set(CUDA_ARCH_BIN
  104. ${fd_known_gpu_archs}
  105. CACHE
  106. STRING
  107. "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
  108. )
  109. set(CUDA_ARCH_PTX
  110. ""
  111. CACHE
  112. STRING
  113. "Specify 'virtual' PTX architectures to build PTX intermediate code for"
  114. )
  115. mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  116. else()
  117. unset(CUDA_ARCH_BIN CACHE)
  118. unset(CUDA_ARCH_PTX CACHE)
  119. endif()
  120. if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
  121. set(cuda_arch_bin "30 35")
  122. elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
  123. if(BUILD_ON_JETSON)
  124. set(cuda_arch_bin "53")
  125. else()
  126. set(cuda_arch_bin "50")
  127. endif()
  128. elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
  129. if(BUILD_ON_JETSON)
  130. set(cuda_arch_bin "62")
  131. else()
  132. set(cuda_arch_bin "60 61")
  133. endif()
  134. elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
  135. if(BUILD_ON_JETSON)
  136. set(cuda_arch_bin "72")
  137. else()
  138. set(cuda_arch_bin "70")
  139. endif()
  140. elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
  141. set(cuda_arch_bin "75")
  142. elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
  143. if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
  144. set(cuda_arch_bin "80")
  145. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
  146. set(cuda_arch_bin "80 86")
  147. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.1+
  148. set(cuda_arch_bin "80 86")
  149. endif()
  150. elseif(${CUDA_ARCH_NAME} STREQUAL "All")
  151. set(cuda_arch_bin ${fd_known_gpu_archs})
  152. elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
  153. message(
  154. STATUS
  155. "WARNING: This is just a warning for publishing release.
  156. You are building GPU version without supporting different architectures.
  157. So the wheel package may fail on other GPU architectures.
  158. You can add -DCUDA_ARCH_NAME=All in cmake command
  159. to get a full wheel package to resolve this warning.
  160. While, this version will still work on local GPU architecture.")
  161. detect_installed_gpus(cuda_arch_bin)
  162. else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
  163. set(cuda_arch_bin ${CUDA_ARCH_BIN})
  164. endif()
  165. if(NEW_RELEASE_JIT)
  166. set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
  167. set(cuda_arch_bin "")
  168. endif()
  169. # remove dots and convert to lists
  170. string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
  171. string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
  172. string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
  173. string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
  174. list(REMOVE_DUPLICATES cuda_arch_bin)
  175. list(REMOVE_DUPLICATES cuda_arch_ptx)
  176. set(nvcc_flags "")
  177. set(nvcc_archs_readable "")
  178. # Tell NVCC to add binaries for the specified GPUs
  179. foreach(arch ${cuda_arch_bin})
  180. if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
  181. # User explicitly specified PTX for the concrete BIN
  182. string(APPEND nvcc_flags
  183. " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
  184. string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
  185. else()
  186. # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
  187. string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
  188. string(APPEND nvcc_archs_readable " sm_${arch}")
  189. endif()
  190. endforeach()
  191. # Tell NVCC to add PTX intermediate code for the specified architectures
  192. foreach(arch ${cuda_arch_ptx})
  193. string(APPEND nvcc_flags
  194. " -gencode arch=compute_${arch},code=compute_${arch}")
  195. string(APPEND nvcc_archs_readable " compute_${arch}")
  196. endforeach()
  197. string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
  198. set(${out_variable}
  199. ${nvcc_flags}
  200. PARENT_SCOPE)
  201. set(${out_variable}_readable
  202. ${nvcc_archs_readable}
  203. PARENT_SCOPE)
  204. endfunction()
  205. message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
  206. if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
  207. set(fd_known_gpu_archs ${fd_known_gpu_archs10})
  208. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  209. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  210. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  211. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
  212. set(fd_known_gpu_archs ${fd_known_gpu_archs11})
  213. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  214. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  215. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  216. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
  217. set(fd_known_gpu_archs "${fd_known_gpu_archs11} 86")
  218. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  219. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  220. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  221. elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.x
  222. set(fd_known_gpu_archs "${fd_known_gpu_archs12} 86")
  223. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  224. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  225. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
  226. endif()
  227. # setting nvcc arch flags
  228. select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
  229. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
  230. message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
  231. # Set C++14 support
  232. set(CUDA_PROPAGATE_HOST_FLAGS OFF)
  233. # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
  234. # So, don't set these flags here.
  235. if(NOT DEFINED CMAKE_CUDA_STANDARD)
  236. set(CMAKE_CUDA_STANDARD 11)
  237. else()
  238. message(WARNING "Detected custom CMAKE_CUDA_STANDARD is using: ${CMAKE_CUDA_STANDARD}")
  239. endif()
  240. # (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflict with -w
  241. # So replace /W[1-4] with /W0
  242. if(WIN32)
  243. string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
  244. endif()
  245. # in cuda9, suppress cuda warning on eigen
  246. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
  247. # Set :expt-relaxed-constexpr to suppress Eigen warnings
  248. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
  249. # Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
  250. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
  251. if(WIN32)
  252. set(CMAKE_CUDA_FLAGS
  253. "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
  254. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
  255. if(MSVC_STATIC_CRT)
  256. foreach(flag_var
  257. CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
  258. CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
  259. if(${flag_var} MATCHES "-MD")
  260. string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
  261. endif()
  262. endforeach()
  263. endif()
  264. endif()
  265. mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
  266. mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)