|
- # CMake file `unity_build` is used to handle Unity Build compilation.
- include(unity_build)
- set(PART_CUDA_KERNEL_FILES)
-
- function(find_register FILENAME PATTERN OUTPUT)
- # find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc.
- # set op_name to OUTPUT
- set(options "")
- set(oneValueArgs "")
- set(multiValueArgs "")
- file(READ ${FILENAME} CONTENT)
- # message ("number of arguments sent to function: ${ARGC}")
- # message ("all function arguments: ${ARGV}")
- # message("PATTERN ${PATTERN}")
- string(REGEX MATCH "${PATTERN}\\([ \t\r\n]*[a-z0-9_]*," register "${CONTENT}")
- if(NOT register STREQUAL "")
- string(REPLACE "${PATTERN}(" "" register "${register}")
- string(REPLACE "," "" register "${register}")
- # [ \t\r\n]+ is used for blank characters.
- # Here we use '+' instead of '*' since it is a REPLACE operation.
- string(REGEX REPLACE "[ \t\r\n]+" "" register "${register}")
- endif()
-
- set(${OUTPUT}
- ${register}
- PARENT_SCOPE)
- endfunction()
-
- function(op_library TARGET)
- # op_library is a function to create op library. The interface is same as
- # cc_library. But it handle split GPU/CPU code and link some common library
- # for ops.
- set(cc_srcs)
- set(cu_srcs)
- set(hip_srcs)
- set(cu_cc_srcs)
- set(hip_cc_srcs)
- set(xpu_cc_srcs)
- set(xpu_kp_cc_srcs)
- set(npu_cc_srcs)
- set(mlu_cc_srcs)
- set(cudnn_cu_cc_srcs)
- set(miopen_cu_cc_srcs)
- set(cudnn_cu_srcs)
- set(miopen_cu_srcs)
- set(CUDNN_FILE)
- set(MIOPEN_FILE)
- set(mkldnn_cc_srcs)
- set(MKLDNN_FILE)
- set(op_common_deps operator op_registry math_function layer
- common_infer_shape_functions)
- if(WITH_ASCEND_CL)
- set(op_common_deps ${op_common_deps} npu_op_runner)
- endif()
- if(WITH_MLU)
- set(op_common_deps ${op_common_deps} mlu_baseop)
- endif()
-
- # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
- set(options UNITY)
- set(oneValueArgs "")
- set(multiValueArgs SRCS DEPS)
- set(pybind_flag 0)
- cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
- "${multiValueArgs}" ${ARGN})
-
- list(LENGTH op_library_SRCS op_library_SRCS_len)
- if(${op_library_SRCS_len} EQUAL 0)
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
- list(APPEND cc_srcs ${TARGET}.cc)
- endif()
- if(WITH_GPU)
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
- list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
- list(APPEND cu_srcs ${TARGET}.cu)
- endif()
- # rename in KP: .kps -> .cu
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
- file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
- file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
- ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
- list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
- endif()
- if(WITH_NV_JETSON)
- list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu")
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
- set(PART_CUDA_KERNEL_FILES
- ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
- ${PART_CUDA_KERNEL_FILES}
- PARENT_SCOPE)
- list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
- endif()
- string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
- list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
- list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
- endif()
- endif()
- if(WITH_ROCM)
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
- list(APPEND hip_cc_srcs ${TARGET}.cu.cc)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
- list(APPEND hip_srcs ${TARGET}.cu)
- endif()
- # rename in KP: .kps -> .cu
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
- file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
- file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
- ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
- list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
- set(PART_CUDA_KERNEL_FILES
- ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
- ${PART_CUDA_KERNEL_FILES}
- PARENT_SCOPE)
- list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
- endif()
- string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc)
- list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu)
- list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu)
- endif()
- endif()
- if(WITH_MKLDNN)
- string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc)
- list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
- endif()
- endif()
- if(WITH_XPU)
- string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
- list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
- endif()
- endif()
- if(WITH_XPU_KP)
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu)
- list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu)
- endif()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
- list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
- endif()
- endif()
- if(WITH_ASCEND_CL)
- string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
- list(APPEND npu_cc_srcs ${NPU_FILE}.cc)
- endif()
- endif()
- if(WITH_MLU)
- string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}")
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc)
- list(APPEND mlu_cc_srcs ${MLU_FILE}.cc)
- endif()
- endif()
- else()
- foreach(src ${op_library_SRCS})
- if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
- list(APPEND miopen_cu_srcs ${src})
- elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$")
- list(APPEND hip_srcs ${src})
- elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
- list(APPEND miopen_cu_cc_srcs ${src})
- elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$")
- list(APPEND hip_cc_srcs ${src})
- elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu$")
- list(APPEND cudnn_cu_srcs ${src})
- elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu$")
- list(APPEND cu_srcs ${src})
- elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
- list(APPEND cudnn_cu_cc_srcs ${src})
- elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$")
- list(APPEND cu_cc_srcs ${src})
- elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
- list(APPEND mkldnn_cc_srcs ${src})
- elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
- list(APPEND xpu_cc_srcs ${src})
- elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$")
- list(APPEND xpu_kp_cc_srcs ${src})
- elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
- list(APPEND xpu_kp_cc_srcs ${src})
- elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
- list(APPEND npu_cc_srcs ${src})
- elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
- list(APPEND mlu_cc_srcs ${src})
- elseif(${src} MATCHES ".*\\.cc$")
- list(APPEND cc_srcs ${src})
- else()
- message(
- FATAL_ERROR
- "${TARGET} Source file ${src} should only be .cc or .cu or .xpu")
- endif()
- endforeach()
- endif()
-
- list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
- list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len)
- list(LENGTH cc_srcs cc_srcs_len)
- if(${cc_srcs_len} EQUAL 0)
- message(
- FATAL_ERROR
- "The op library ${TARGET} should contains at least one .cc file")
- endif()
- if(WIN32)
- # remove windows unsupported op, because windows has no nccl, no warpctc such ops.
- foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
- if("${TARGET}" STREQUAL "${windows_unsupport_op}")
- return()
- endif()
- endforeach()
- endif()
-
- # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
- if(WITH_UNITY_BUILD AND op_library_UNITY)
- # Generate the unity target name by the directory where source files located.
- string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET
- ${CMAKE_CURRENT_SOURCE_DIR})
- string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
- set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
- if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY)
- set(OP_LIBRARY
- ${UNITY_TARGET} ${OP_LIBRARY}
- CACHE INTERNAL "op libs")
- endif()
- else()
- set(OP_LIBRARY
- ${TARGET} ${OP_LIBRARY}
- CACHE INTERNAL "op libs")
- endif()
-
- list(LENGTH op_library_DEPS op_library_DEPS_len)
- if(${op_library_DEPS_len} GREATER 0)
- set(DEPS_OPS
- ${TARGET} ${DEPS_OPS}
- PARENT_SCOPE)
- endif()
- if(WITH_GPU)
- # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
- if(WITH_UNITY_BUILD AND op_library_UNITY)
- # Combine the cc and cu source files.
- compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs}
- ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs})
- compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs}
- ${cu_srcs})
- if(TARGET ${UNITY_TARGET})
- # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
- target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}
- ${unity_target_cu_sources})
- else()
- # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
- nv_library(
- ${UNITY_TARGET}
- SRCS ${unity_target_cc_sources} ${unity_target_cu_sources}
- DEPS ${op_library_DEPS} ${op_common_deps})
- endif()
- # Add alias library to handle dependencies.
- add_library(${TARGET} ALIAS ${UNITY_TARGET})
- else()
- nv_library(
- ${TARGET}
- SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs}
- ${mkldnn_cc_srcs} ${cu_srcs}
- DEPS ${op_library_DEPS} ${op_common_deps})
- endif()
- elseif(WITH_ROCM)
- list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
- list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
- list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
- list(REMOVE_ITEM hip_srcs "cholesky_solve_op.cu")
- list(REMOVE_ITEM hip_srcs "lu_op.cu")
- list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
- list(REMOVE_ITEM hip_srcs "svd_op.cu")
- list(REMOVE_ITEM hip_srcs "eigvalsh_op.cu")
- list(REMOVE_ITEM hip_srcs "qr_op.cu")
- list(REMOVE_ITEM hip_srcs "eigh_op.cu")
- list(REMOVE_ITEM hip_srcs "lstsq_op.cu")
- list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
- list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
- hip_library(
- ${TARGET}
- SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs}
- ${mkldnn_cc_srcs} ${hip_srcs}
- DEPS ${op_library_DEPS} ${op_common_deps})
- elseif(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
- xpu_library(
- ${TARGET}
- SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs}
- DEPS ${op_library_DEPS} ${op_common_deps})
- else()
- # deal with CANN version control while registering NPU operators before build
- if(WITH_ASCEND_CL)
- if(CANN_VERSION LESS 504000)
- list(REMOVE_ITEM npu_cc_srcs "multinomial_op_npu.cc")
- list(REMOVE_ITEM npu_cc_srcs "take_along_axis_op_npu.cc")
- endif()
- endif()
- # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
- if(WITH_UNITY_BUILD AND op_library_UNITY)
- # Combine the cc source files.
- compose_unity_target_sources(
- ${UNITY_TARGET}
- cc
- ${cc_srcs}
- ${mkldnn_cc_srcs}
- ${xpu_cc_srcs}
- ${npu_cc_srcs}
- ${mlu_cc_srcs})
- if(TARGET ${UNITY_TARGET})
- # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
- target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
- else()
- # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
- cc_library(
- ${UNITY_TARGET}
- SRCS ${unity_target_cc_sources}
- DEPS ${op_library_DEPS} ${op_common_deps})
- endif()
- # Add alias library to handle dependencies.
- add_library(${TARGET} ALIAS ${UNITY_TARGET})
- else()
- cc_library(
- ${TARGET}
- SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs}
- ${mlu_cc_srcs}
- DEPS ${op_library_DEPS} ${op_common_deps})
- endif()
- endif()
-
- list(LENGTH cu_srcs cu_srcs_len)
- list(LENGTH hip_srcs hip_srcs_len)
- list(LENGTH cu_cc_srcs cu_cc_srcs_len)
- list(LENGTH hip_cc_srcs hip_cc_srcs_len)
- list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
- list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
- list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
- list(LENGTH npu_cc_srcs npu_cc_srcs_len)
- list(LENGTH mlu_cc_srcs mlu_cc_srcs_len)
-
- # Define operators that don't need pybind here.
- foreach(
- manual_pybind_op
- "compare_all_op"
- "compare_op"
- "logical_op"
- "bitwise_op"
- "nccl_op"
- "tensor_array_read_write_op"
- "tensorrt_engine_op"
- "conv_fusion_op")
-
- if("${TARGET}" STREQUAL "${manual_pybind_op}")
- set(pybind_flag 1)
- endif()
- endforeach()
-
- # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
- # Note that it's enough to just adding one operator to pybind in a *_op.cc file.
- # And for detail pybind information, please see generated paddle/pybind/pybind.h.
- set(ORIGINAL_TARGET ${TARGET})
- string(REGEX REPLACE "_op" "" TARGET "${TARGET}")
-
- foreach(cc_src ${cc_srcs})
- # pybind USE_OP_ITSELF
- set(op_name "")
- find_register(${cc_src} "REGISTER_OPERATOR" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
- # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
- set(TARGET ${op_name})
- set(pybind_flag 1)
- endif()
-
- set(op_name "")
- find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
- # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
- set(TARGET ${op_name})
- set(pybind_flag 1)
- endif()
-
- # pybind USE_OP_DEVICE_KERNEL for CPU
- set(op_name "")
- find_register(${cc_src} "REGISTER_OP_CPU_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n")
- # why change TARGET here?
- # when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
- # in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add
- # and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h
- # however, grad_add has no mkldnn kernel.
- set(TARGET ${op_name})
- set(pybind_flag 1)
- endif()
- endforeach()
-
- # pybind USE_OP_DEVICE_KERNEL for CUDA
- list(APPEND cu_srcs ${cu_cc_srcs})
- # message("cu_srcs ${cu_srcs}")
- foreach(cu_src ${cu_srcs})
- set(op_name "")
- find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
-
- # pybind USE_OP_DEVICE_KERNEL for ROCm
- list(APPEND hip_srcs ${hip_cc_srcs})
- # message("hip_srcs ${hip_srcs}")
- foreach(hip_src ${hip_srcs})
- set(op_name "")
- find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
-
- # pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN
- list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs})
- list(APPEND cudnn_cu_srcs ${miopen_cu_cc_srcs})
- list(APPEND cudnn_cu_srcs ${miopen_cu_srcs})
- list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
- #message("cudnn_cu_srcs ${cudnn_cu_srcs}")
- if(${cudnn_cu_srcs_len} GREATER 0 AND ${ORIGINAL_TARGET} STREQUAL
- "activation_op")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
- else()
- foreach(cudnn_src ${cudnn_cu_srcs})
- set(op_name "")
- find_register(${cudnn_src} "REGISTER_OP_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDNN);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
- endif()
-
- if(WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
- if(${ORIGINAL_TARGET} STREQUAL "activation_op")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, XPU);\n")
- else()
- foreach(xpu_src ${xpu_cc_srcs})
- set(op_name "")
- find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
- set(pybind_flag 1)
- else()
- find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL_FUNCTOR" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
- set(pybind_flag 1)
- endif()
- endif()
- endforeach()
- endif()
- endif()
-
- # pybind USE_OP_DEVICE_KERNEL for XPU KP
- if(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
- foreach(xpu_kp_src ${xpu_kp_cc_srcs})
- set(op_name "")
- find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n")
- message(STATUS "Building KP Target: ${op_name}")
- set(pybind_flag 1)
- endif()
- endforeach()
- endif()
-
- # pybind USE_OP_DEVICE_KERNEL for NPU
- if(WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
- foreach(npu_src ${npu_cc_srcs})
- set(op_name "")
- find_register(${npu_src} "REGISTER_OP_NPU_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, NPU);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
- endif()
-
- # pybind USE_OP_DEVICE_KERNEL for MLU
- if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0)
- foreach(mlu_src ${mlu_cc_srcs})
- set(op_name "")
- find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
- endif()
-
- # pybind USE_OP_DEVICE_KERNEL for MKLDNN
- if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
- # Append first implemented MKLDNN activation operator
- if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
- file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
- elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
- elseif(${MKLDNN_FILE} STREQUAL "transpose_mkldnn_op")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);\n")
- elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n")
- else()
- foreach(mkldnn_src ${mkldnn_cc_srcs})
- set(op_name "")
- find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name)
- if(NOT ${op_name} EQUAL "")
- file(APPEND ${pybind_file}
- "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n")
- set(pybind_flag 1)
- endif()
- endforeach()
- endif()
- endif()
-
- # pybind USE_NO_KERNEL_OP
- # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
- string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
- string(REPLACE "_op" "" TARGET "${TARGET}")
- if(${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
- file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
- set(pybind_flag 1)
- endif()
-
- # pybind USE_OP
- if(${pybind_flag} EQUAL 0)
- # NOTE(*): activation use macro to regist the kernels, set use_op manually.
- if(${TARGET} STREQUAL "activation")
- file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
- elseif(${TARGET} STREQUAL "fake_dequantize")
- file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
- elseif(${TARGET} STREQUAL "fake_quantize")
- file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n")
- elseif(${TARGET} STREQUAL "tensorrt_engine_op")
- message(
- STATUS
- "Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
- )
- else()
- file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
- endif()
- endif()
- endfunction()
-
- function(register_operators)
- set(options "")
- set(oneValueArgs "")
- set(multiValueArgs EXCLUDES DEPS)
- cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}"
- "${multiValueArgs}" ${ARGN})
- file(
- GLOB OPS
- RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
- "*_op.cc")
- string(REPLACE "_mkldnn" "" OPS "${OPS}")
- string(REPLACE "_xpu" "" OPS "${OPS}")
- string(REPLACE "_npu" "" OPS "${OPS}")
- string(REPLACE "_mlu" "" OPS "${OPS}")
- string(REPLACE ".cc" "" OPS "${OPS}")
- list(REMOVE_DUPLICATES OPS)
- list(LENGTH register_operators_DEPS register_operators_DEPS_len)
-
- foreach(src ${OPS})
- list(FIND register_operators_EXCLUDES ${src} _index)
- if(${_index} EQUAL -1)
- if(${register_operators_DEPS_len} GREATER 0)
- op_library(${src} UNITY DEPS ${register_operators_DEPS})
- else()
- op_library(${src} UNITY)
- endif()
- endif()
- endforeach()
-
- # Complete the processing of `UNITY_TARGET`.
- if(WITH_UNITY_BUILD)
- finish_unity_target(cc)
- if(WITH_GPU)
- finish_unity_target(cu)
- endif()
- endif()
- endfunction()
|