fast_tokenizer.cmake 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. include(ExternalProject)
  15. set(FASTTOKENIZER_PROJECT "extern_fast_tokenizer")
  16. set(FASTTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/fast_tokenizer)
  17. set(FASTTOKENIZER_SOURCE_DIR
  18. ${THIRD_PARTY_PATH}/fast_tokenizer/src/${FASTTOKENIZER_PROJECT})
  19. set(FASTTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/fast_tokenizer)
  20. set(FASTTOKENIZER_INC_DIR
  21. "${FASTTOKENIZER_INSTALL_DIR}/include"
  22. "${FASTTOKENIZER_INSTALL_DIR}/third_party/include"
  23. CACHE PATH "fast_tokenizer include directory." FORCE)
  24. set(FASTTOKENIZER_LIB_DIR
  25. "${FASTTOKENIZER_INSTALL_DIR}/lib/"
  26. CACHE PATH "fast_tokenizer lib directory." FORCE)
  27. set(FASTTOKENIZER_THIRD_LIB_DIR
  28. "${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/"
  29. CACHE PATH "fast_tokenizer lib directory." FORCE)
  30. set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
  31. "${FASTTOKENIZER_LIB_DIR}")
  32. include_directories(${FASTTOKENIZER_INC_DIR})
  33. # Set lib path
  34. if(WIN32)
  35. set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
  36. CACHE FILEPATH "fast_tokenizer compile library." FORCE)
  37. set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
  38. set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
  39. elseif(APPLE)
  40. set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
  41. CACHE FILEPATH "fast_tokenizer compile library." FORCE)
  42. else()
  43. set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
  44. CACHE FILEPATH "fast_tokenizer compile library." FORCE)
  45. endif(WIN32)
  46. message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
  47. set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
  48. set(FASTTOKENIZER_VERSION "1.0.2")
  49. # Set download url
  50. if(WIN32)
  51. set(FASTTOKENIZER_FILE "fast_tokenizer-win-x64-${FASTTOKENIZER_VERSION}.zip")
  52. if(NOT CMAKE_CL_64)
  53. set(FASTTOKENIZER_FILE "fast_tokenizer-win-x86-${FASTTOKENIZER_VERSION}.zip")
  54. endif()
  55. elseif(APPLE)
  56. if(CURRENT_OSX_ARCH MATCHES "arm64")
  57. set(FASTTOKENIZER_FILE "fast_tokenizer-osx-arm64-${FASTTOKENIZER_VERSION}.tgz")
  58. else()
  59. set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz")
  60. endif()
  61. else()
  62. if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
  63. set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
  64. else()
  65. set(FASTTOKENIZER_FILE "fast_tokenizer-linux-x64-${FASTTOKENIZER_VERSION}.tgz")
  66. endif()
  67. endif()
  68. set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}")
  69. ExternalProject_Add(
  70. ${FASTTOKENIZER_PROJECT}
  71. ${EXTERNAL_PROJECT_LOG_ARGS}
  72. URL ${FASTTOKENIZER_URL}
  73. PREFIX ${FASTTOKENIZER_PREFIX_DIR}
  74. DOWNLOAD_NO_PROGRESS 1
  75. CONFIGURE_COMMAND ""
  76. BUILD_COMMAND ""
  77. UPDATE_COMMAND ""
  78. INSTALL_COMMAND
  79. ${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
  80. BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
  81. add_library(fast_tokenizer STATIC IMPORTED GLOBAL)
  82. set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB})
  83. add_dependencies(fast_tokenizer ${FASTTOKENIZER_PROJECT})
  84. list(APPEND DEPEND_LIBS fast_tokenizer)
  85. if (WIN32)
  86. add_library(icudt STATIC IMPORTED GLOBAL)
  87. set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB})
  88. add_dependencies(icudt ${FASTTOKENIZER_PROJECT})
  89. list(APPEND DEPEND_LIBS icudt)
  90. add_library(icuuc STATIC IMPORTED GLOBAL)
  91. set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB})
  92. add_dependencies(icuuc ${FASTTOKENIZER_PROJECT})
  93. list(APPEND DEPEND_LIBS icuuc)
  94. endif()