diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cdc5487..3e9b4be4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,49 @@ option(GENERATE_OPERATOR_CALL_INSTANTIATIONS "Generate explicit operator call instantiations" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) +set(INFINI_OPS_PLUGINS "" CACHE STRING + "Comma- or semicolon-separated infini_ops build-time plugins to enable") + +function(_infini_ops_enable_legacy_options_from_plugins) + if(NOT INFINI_OPS_PLUGINS) + return() + endif() + + set(_requested_plugins "${INFINI_OPS_PLUGINS}") + string(REPLACE "," ";" _requested_plugins "${_requested_plugins}") + foreach(_plugin IN LISTS _requested_plugins) + string(STRIP "${_plugin}" _plugin) + if(_plugin STREQUAL "") + continue() + elseif(_plugin STREQUAL "cpu") + set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) + elseif(_plugin STREQUAL "nvidia") + set(WITH_NVIDIA ON CACHE BOOL "Enable CUDA backend" FORCE) + elseif(_plugin STREQUAL "iluvatar") + set(WITH_ILUVATAR ON CACHE BOOL "Enable Iluvatar GPU backend" FORCE) + elseif(_plugin STREQUAL "hygon") + set(WITH_HYGON ON CACHE BOOL "Enable Hygon GPU backend" FORCE) + elseif(_plugin STREQUAL "metax") + set(WITH_METAX ON CACHE BOOL "Enable MetaX backend" FORCE) + elseif(_plugin STREQUAL "cambricon") + set(WITH_CAMBRICON ON CACHE BOOL "Enable Cambricon backend" FORCE) + elseif(_plugin STREQUAL "moore") + set(WITH_MOORE ON CACHE BOOL "Enable Moore backend" FORCE) + elseif(_plugin STREQUAL "ascend") + set(WITH_ASCEND ON CACHE BOOL "Enable Ascend backend" FORCE) + elseif(_plugin STREQUAL "cuda-common") + # Shared dependency plugin; no legacy device option to set. + else() + message(FATAL_ERROR + "Unknown infini_ops plugin `${_plugin}`. v1 supports built-in plugins: " + "`cpu`, `nvidia`, `iluvatar`, `hygon`, `metax`, `moore`, `cambricon`, `ascend`, `cuda-common`") + endif() + endforeach() +endfunction() + +_infini_ops_enable_legacy_options_from_plugins() + + set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") function(_infiniops_find_hygon_cuda_root out_var dtk_root) diff --git a/cmake/infini_ops_plugins.cmake b/cmake/infini_ops_plugins.cmake new file mode 100644 index 00000000..19af9356 --- /dev/null +++ b/cmake/infini_ops_plugins.cmake @@ -0,0 +1,253 @@ +include_guard(GLOBAL) + +set(INFINI_OPS_PLUGINS "" CACHE STRING + "Comma- or semicolon-separated infini_ops build-time plugins to enable") +set(INFINI_OPS_PLUGIN_ROOT "${PROJECT_SOURCE_DIR}/plugins" CACHE PATH + "Directory containing infini_ops build-time plugins") +set(INFINI_OPS_PLUGIN_CONTRACT_VERSION 1) + +set(_INFINI_OPS_KNOWN_DEVICE_PLUGINS + cpu nvidia iluvatar hygon metax moore cambricon ascend) + +function(_infini_ops_append_unique_global property_name) + get_property(_values GLOBAL PROPERTY "${property_name}") + foreach(_value ${ARGN}) + if("${_value}" STREQUAL "") + continue() + endif() + + list(FIND _values "${_value}" _index) + if(_index EQUAL -1) + set_property(GLOBAL APPEND PROPERTY "${property_name}" "${_value}") + endif() + endforeach() +endfunction() + +function(infini_ops_register_plugin) + set(_one_value_args NAME KIND CONTRACT_VERSION CMAKE_ENTRY) + set(_multi_value_args + DEVICES + DEPENDS + SOURCE_ROOTS + OPERATOR_ROOTS + DEVICE_HEADERS + TEST_DEVICES) + cmake_parse_arguments(ARG "" "${_one_value_args}" "${_multi_value_args}" ${ARGN}) + + foreach(_required NAME KIND CONTRACT_VERSION CMAKE_ENTRY) + if(NOT ARG_${_required}) + message(FATAL_ERROR "`infini_ops_register_plugin` missing `${_required}`") + endif() + endforeach() + + if(NOT ARG_KIND STREQUAL "shared" AND NOT ARG_KIND STREQUAL "device") + message(FATAL_ERROR "infini_ops plugin `${ARG_NAME}` has invalid `kind`: `${ARG_KIND}`") + endif() + + if(NOT "${ARG_CONTRACT_VERSION}" STREQUAL "${INFINI_OPS_PLUGIN_CONTRACT_VERSION}") + message(FATAL_ERROR + "infini_ops plugin `${ARG_NAME}` uses contract `${ARG_CONTRACT_VERSION}`; " + "expected `${INFINI_OPS_PLUGIN_CONTRACT_VERSION}`") + endif() + + foreach(_device IN LISTS ARG_DEVICES) + list(FIND _INFINI_OPS_KNOWN_DEVICE_PLUGINS "${_device}" _known_index) + if(_known_index EQUAL -1) + message(FATAL_ERROR + "infini_ops plugin `${ARG_NAME}` declares unknown device `${_device}`") + endif() + endforeach() + + if(ARG_KIND STREQUAL "device" AND NOT ARG_DEVICES) + message(FATAL_ERROR "infini_ops device plugin `${ARG_NAME}` must declare `DEVICES`") + endif() + + if(ARG_KIND STREQUAL "shared" AND ARG_DEVICES) + message(FATAL_ERROR "infini_ops shared plugin `${ARG_NAME}` must not declare `DEVICES`") + endif() + + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_NAMES "${ARG_NAME}") + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_DEVICES ${ARG_DEVICES}) + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_SOURCE_ROOTS ${ARG_SOURCE_ROOTS}) + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_OPERATOR_ROOTS ${ARG_OPERATOR_ROOTS}) + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_DEVICE_HEADERS ${ARG_DEVICE_HEADERS}) + _infini_ops_append_unique_global(INFINI_OPS_PLUGIN_TEST_DEVICES ${ARG_TEST_DEVICES}) +endfunction() + +function(infini_ops_register_device) + infini_ops_register_plugin( + KIND device + CONTRACT_VERSION ${INFINI_OPS_PLUGIN_CONTRACT_VERSION} + ${ARGN}) +endfunction() + +function(infini_ops_enable_plugin name) + get_property(_loaded GLOBAL PROPERTY INFINI_OPS_PLUGIN_LOADED) + list(FIND _loaded "${name}" _loaded_index) + if(NOT _loaded_index EQUAL -1) + return() + endif() + + get_property(_loading GLOBAL PROPERTY INFINI_OPS_PLUGIN_LOADING_STACK) + list(FIND _loading "${name}" _loading_index) + if(NOT _loading_index EQUAL -1) + list(APPEND _loading "${name}") + string(REPLACE ";" " -> " _cycle "${_loading}") + message(FATAL_ERROR "infini_ops plugin dependency cycle detected: `${_cycle}`") + endif() + + set(_entry_path "${INFINI_OPS_PLUGIN_ROOT}/${name}/plugin.cmake") + if(NOT EXISTS "${_entry_path}") + message(FATAL_ERROR "infini_ops plugin `${name}` `CMake` entry not found: `${_entry_path}`") + endif() + + set_property(GLOBAL APPEND PROPERTY INFINI_OPS_PLUGIN_LOADING_STACK "${name}") + include("${_entry_path}") + get_property(_loading GLOBAL PROPERTY INFINI_OPS_PLUGIN_LOADING_STACK) + list(REMOVE_ITEM _loading "${name}") + set_property(GLOBAL PROPERTY INFINI_OPS_PLUGIN_LOADING_STACK "${_loading}") + + get_property(_registered GLOBAL PROPERTY INFINI_OPS_PLUGIN_NAMES) + list(FIND _registered "${name}" _registered_index) + if(_registered_index EQUAL -1) + message(FATAL_ERROR "infini_ops plugin `${name}` did not call `infini_ops_register_plugin`") + endif() + + set_property(GLOBAL APPEND PROPERTY INFINI_OPS_PLUGIN_LOADED "${name}") +endfunction() + +function(infini_ops_enable_requested_plugins) + set(_requested) + + if(INFINI_OPS_PLUGINS) + set(_raw_plugins "${INFINI_OPS_PLUGINS}") + string(REPLACE "," ";" _raw_plugins "${_raw_plugins}") + foreach(_plugin IN LISTS _raw_plugins) + string(STRIP "${_plugin}" _plugin) + if(NOT _plugin STREQUAL "") + list(APPEND _requested "${_plugin}") + endif() + endforeach() + endif() + + if(WITH_CPU) + list(APPEND _requested cpu) + endif() + if(WITH_NVIDIA) + list(APPEND _requested nvidia) + endif() + if(WITH_ILUVATAR) + list(APPEND _requested iluvatar) + endif() + if(WITH_HYGON) + list(APPEND _requested hygon) + endif() + if(WITH_METAX) + list(APPEND _requested metax) + endif() + if(WITH_MOORE) + list(APPEND _requested moore) + endif() + if(WITH_CAMBRICON) + list(APPEND _requested cambricon) + endif() + if(WITH_ASCEND) + list(APPEND _requested ascend) + endif() + + if(_requested) + list(REMOVE_DUPLICATES _requested) + else() + list(APPEND _requested cpu) + set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) + endif() + + foreach(_plugin IN LISTS _requested) + infini_ops_enable_plugin("${_plugin}") + endforeach() +endfunction() + +function(infini_ops_get_enabled_devices out_var) + get_property(_devices GLOBAL PROPERTY INFINI_OPS_PLUGIN_DEVICES) + if(NOT _devices) + set(_devices) + endif() + set(${out_var} ${_devices} PARENT_SCOPE) +endfunction() + +function(_infini_ops_json_escape value out_var) + string(REPLACE "\\" "\\\\" _escaped "${value}") + string(REPLACE "\"" "\\\"" _escaped "${_escaped}") + set(${out_var} "${_escaped}" PARENT_SCOPE) +endfunction() + +function(_infini_ops_append_json_array path field trailing_comma) + file(APPEND "${path}" " \"${field}\": [") + set(_first TRUE) + foreach(_value ${ARGN}) + if(_first) + set(_first FALSE) + else() + file(APPEND "${path}" ", ") + endif() + _infini_ops_json_escape("${_value}" _escaped) + file(APPEND "${path}" "\"${_escaped}\"") + endforeach() + file(APPEND "${path}" "]") + if(trailing_comma) + file(APPEND "${path}" ",") + endif() + file(APPEND "${path}" "\n") +endfunction() + +function(_infini_ops_append_json_map path field trailing_comma) + file(APPEND "${path}" " \"${field}\": {") + set(_first TRUE) + foreach(_entry ${ARGN}) + string(FIND "${_entry}" "=" _equals) + if(_equals EQUAL -1) + message(FATAL_ERROR "Invalid infini_ops plugin map entry `${_entry}`") + endif() + string(SUBSTRING "${_entry}" 0 ${_equals} _key) + math(EXPR _value_start "${_equals} + 1") + string(SUBSTRING "${_entry}" ${_value_start} -1 _value) + + if(_first) + set(_first FALSE) + else() + file(APPEND "${path}" ",") + endif() + _infini_ops_json_escape("${_key}" _escaped_key) + _infini_ops_json_escape("${_value}" _escaped_value) + file(APPEND "${path}" "\n \"${_escaped_key}\": \"${_escaped_value}\"") + endforeach() + if(NOT _first) + file(APPEND "${path}" "\n ") + endif() + file(APPEND "${path}" "}") + if(trailing_comma) + file(APPEND "${path}" ",") + endif() + file(APPEND "${path}" "\n") +endfunction() + +function(infini_ops_write_plugin_registry path) + get_property(_plugins GLOBAL PROPERTY INFINI_OPS_PLUGIN_NAMES) + get_property(_devices GLOBAL PROPERTY INFINI_OPS_PLUGIN_DEVICES) + get_property(_source_roots GLOBAL PROPERTY INFINI_OPS_PLUGIN_SOURCE_ROOTS) + get_property(_operator_roots GLOBAL PROPERTY INFINI_OPS_PLUGIN_OPERATOR_ROOTS) + get_property(_device_headers GLOBAL PROPERTY INFINI_OPS_PLUGIN_DEVICE_HEADERS) + get_property(_test_devices GLOBAL PROPERTY INFINI_OPS_PLUGIN_TEST_DEVICES) + + file(WRITE "${path}" "{\n") + _infini_ops_append_json_array("${path}" "plugins" TRUE ${_plugins}) + _infini_ops_append_json_array("${path}" "devices" TRUE ${_devices}) + _infini_ops_append_json_array("${path}" "source_roots" TRUE ${_source_roots}) + _infini_ops_append_json_array("${path}" "operator_roots" TRUE ${_operator_roots}) + _infini_ops_append_json_map("${path}" "device_headers" TRUE ${_device_headers}) + _infini_ops_append_json_map("${path}" "test_devices" FALSE ${_test_devices}) + file(APPEND "${path}" "}\n") + + message(STATUS "infini_ops plugins: `${_plugins}`") + message(STATUS "infini_ops plugin devices: `${_devices}`") +endfunction() diff --git a/plugins/ascend/plugin.cmake b/plugins/ascend/plugin.cmake new file mode 100644 index 00000000..83049e8a --- /dev/null +++ b/plugins/ascend/plugin.cmake @@ -0,0 +1,84 @@ +infini_ops_register_device( + NAME ascend + CMAKE_ENTRY plugin.cmake + DEVICES ascend + SOURCE_ROOTS src/native/ascend + OPERATOR_ROOTS src/native/ascend/ops + DEVICE_HEADERS ascend=native/ascend/device_.h + TEST_DEVICES ascend=npu) + +# ASCEND_HOME is set by the top-level CMakeLists.txt. +file(GLOB_RECURSE ASCEND_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/ascend/*.cc" + "${INFINI_OPS_SRC_DIR}/native/ascend/*.cpp") +# Exclude `kernel_impl.cpp`: `AscendC` device code, not compiled by the host C++ compiler. +list(FILTER ASCEND_SOURCES EXCLUDE REGEX ".*kernel_impl\\.cpp$") +# Exclude custom/: standalone PyTorch extension, built separately. +list(FILTER ASCEND_SOURCES EXCLUDE REGEX ".*/custom/.*") + +target_compile_definitions(infiniops PUBLIC WITH_ASCEND=1) +target_sources(infiniops PRIVATE ${ASCEND_SOURCES}) + +# Resolve the driver lib dir two levels above the toolkit root. +get_filename_component(ASCEND_ROOT "${ASCEND_HOME}/../.." ABSOLUTE) + +# Prefer the real driver HAL; fall back to the toolkit stub for build-only +# environments (e.g., Docker CI images without hardware drivers installed). +# CANN <= 8.0: stub at runtime/lib64/stub/; CANN >= 8.5: devlib/-linux/devlib/. +set(ASCEND_HAL_REAL "${ASCEND_ROOT}/driver/lib64/driver/libascend_hal.so") +set(ASCEND_HAL_STUB "${ASCEND_HOME}/runtime/lib64/stub/libascend_hal.so") +set(ASCEND_HAL_DEVLIB "${ASCEND_HOME}/${CMAKE_SYSTEM_PROCESSOR}-linux/devlib/libascend_hal.so") +if(EXISTS "${ASCEND_HAL_REAL}") + set(ASCEND_HAL_LIB "${ASCEND_HAL_REAL}") +elseif(EXISTS "${ASCEND_HAL_STUB}") + set(ASCEND_HAL_LIB "${ASCEND_HAL_STUB}") + message(STATUS "ascend_hal: driver not found, using stub for linking") +elseif(EXISTS "${ASCEND_HAL_DEVLIB}") + set(ASCEND_HAL_LIB "${ASCEND_HAL_DEVLIB}") + message(STATUS "ascend_hal: driver not found, using devlib for linking") +else() + message(FATAL_ERROR "libascend_hal.so not found (tried ${ASCEND_HAL_REAL}, ${ASCEND_HAL_STUB}, and ${ASCEND_HAL_DEVLIB})") +endif() + +target_include_directories(infiniops PUBLIC + "${ASCEND_HOME}/include" + "${ASCEND_HOME}/include/aclnn" + "${ASCEND_HOME}/include/aclnnop") +target_link_libraries(infiniops PUBLIC + "${ASCEND_HOME}/lib64/libascendcl.so" + "${ASCEND_HOME}/lib64/libnnopbase.so" + "${ASCEND_HOME}/lib64/libopapi.so" + "${ASCEND_HAL_LIB}") + +# ATB (Ascend Transformer Boost) provides fused operators like +# `PagedAttention` and `ReshapeAndCache` that are graph-capture safe. +set(ATB_HOME_DIR "$ENV{ATB_HOME_PATH}") +if(NOT ATB_HOME_DIR) + # Default search path under CANN nnal directory. + file(GLOB ATB_SEARCH_DIRS "/usr/local/Ascend/nnal/atb/*/atb/cxx_abi_1") + if(ATB_SEARCH_DIRS) + list(SORT ATB_SEARCH_DIRS ORDER DESCENDING) + list(GET ATB_SEARCH_DIRS 0 ATB_HOME_DIR) + endif() +endif() + +if(ATB_HOME_DIR AND EXISTS "${ATB_HOME_DIR}/include/atb/operation.h") + message(STATUS "ATB found: ${ATB_HOME_DIR}") + target_compile_definitions(infiniops PUBLIC INFINI_HAS_ATB=1) + target_include_directories(infiniops PUBLIC "${ATB_HOME_DIR}/include") + target_link_libraries(infiniops PUBLIC "${ATB_HOME_DIR}/lib/libatb.so") +else() + message(STATUS "ATB not found - ATB-based operators disabled") +endif() + +# Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`). +if(BUILD_CUSTOM_KERNEL) + add_subdirectory( + "${INFINI_OPS_SRC_DIR}/native/ascend/custom" + "${CMAKE_CURRENT_BINARY_DIR}/native/ascend/custom") + + # Link the compiled `AscendC` kernel objects into `infiniops` so that + # custom kernel implementations (e.g. `RmsNorm` index 1) can call + # them via the generated launch functions. + target_compile_definitions(infiniops PUBLIC INFINI_HAS_CUSTOM_KERNELS=1) +endif() diff --git a/plugins/ascend/plugin.json b/plugins/ascend/plugin.json new file mode 100644 index 00000000..ea5317ee --- /dev/null +++ b/plugins/ascend/plugin.json @@ -0,0 +1,22 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [], + "device_headers": { + "ascend": "native/ascend/device_.h" + }, + "devices": [ + "ascend" + ], + "kind": "device", + "name": "ascend", + "operator_roots": [ + "src/native/ascend/ops" + ], + "source_roots": [ + "src/native/ascend" + ], + "test_devices": { + "ascend": "npu" + } +} diff --git a/plugins/cambricon/plugin.cmake b/plugins/cambricon/plugin.cmake new file mode 100644 index 00000000..6f781afe --- /dev/null +++ b/plugins/cambricon/plugin.cmake @@ -0,0 +1,53 @@ +infini_ops_register_device( + NAME cambricon + CMAKE_ENTRY plugin.cmake + DEVICES cambricon + SOURCE_ROOTS src/native/cambricon + OPERATOR_ROOTS src/native/cambricon/ops + DEVICE_HEADERS cambricon=native/cambricon/device_.h + TEST_DEVICES cambricon=mlu) + +file(GLOB_RECURSE CAMBRICON_MLU_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cambricon/ops/*/*.mlu") +find_program(CNCC_COMPILER cncc HINTS "${NEUWARE_HOME}/bin" "$ENV{NEUWARE_HOME}/bin" /usr/local/neuware/bin) +if(CNCC_COMPILER) + message(STATUS "Found cncc: ${CNCC_COMPILER}") + set(MLU_COMPILE_OPTS + -c --bang-mlu-arch=mtp_592 -O3 -fPIC -Wall -Werror -std=c++17 -pthread + -I${INFINI_OPS_SRC_DIR} -I${NEUWARE_HOME}/include + -idirafter /usr/local/neuware/lib/clang/11.1.0/include) + function(compile_mlu_file src_file) + get_filename_component(name ${src_file} NAME_WE) + get_filename_component(path ${src_file} DIRECTORY) + file(RELATIVE_PATH rel_path "${INFINI_OPS_SRC_DIR}" "${path}") + set(out_file "${CMAKE_CURRENT_BINARY_DIR}/${rel_path}/${name}.o") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${rel_path}") + add_custom_command(OUTPUT ${out_file} + COMMAND ${CNCC_COMPILER} ${MLU_COMPILE_OPTS} -c ${src_file} -o ${out_file} + DEPENDS ${src_file} + COMMENT "Building MLU kernel: ${src_file}") + set_property(DIRECTORY APPEND PROPERTY CAMBRICON_OBJECTS ${out_file}) + endfunction() + foreach(src ${CAMBRICON_MLU_SOURCES}) + compile_mlu_file(${src}) + endforeach() + get_directory_property(CAMBRICON_OBJECT_FILES CAMBRICON_OBJECTS) + if(CAMBRICON_OBJECT_FILES) + target_sources(infiniops PRIVATE ${CAMBRICON_OBJECT_FILES}) + endif() +else() + message(WARNING "cncc compiler not found. MLU kernels will not be compiled.") +endif() + +target_compile_definitions(infiniops PRIVATE WITH_CAMBRICON=1) +target_include_directories(infiniops PUBLIC "${NEUWARE_HOME}/include") +target_link_libraries(infiniops PUBLIC + ${CAMBRICON_RUNTIME_LIB} + ${CAMBRICON_CNNL_LIB} + ${CAMBRICON_CNNL_EXTRA_LIB} + ${CAMBRICON_PAPI_LIB}) + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_compile_options(infiniops PUBLIC + "$<$:SHELL:-idirafter /usr/local/neuware/lib/clang/11.1.0/include>") +endif() diff --git a/plugins/cambricon/plugin.json b/plugins/cambricon/plugin.json new file mode 100644 index 00000000..96dda1cd --- /dev/null +++ b/plugins/cambricon/plugin.json @@ -0,0 +1,22 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [], + "device_headers": { + "cambricon": "native/cambricon/device_.h" + }, + "devices": [ + "cambricon" + ], + "kind": "device", + "name": "cambricon", + "operator_roots": [ + "src/native/cambricon/ops" + ], + "source_roots": [ + "src/native/cambricon" + ], + "test_devices": { + "cambricon": "mlu" + } +} diff --git a/plugins/cpu/plugin.cmake b/plugins/cpu/plugin.cmake new file mode 100644 index 00000000..c67cabf7 --- /dev/null +++ b/plugins/cpu/plugin.cmake @@ -0,0 +1,20 @@ +infini_ops_register_device( + NAME cpu + CMAKE_ENTRY plugin.cmake + DEVICES cpu + SOURCE_ROOTS src/native/cpu + OPERATOR_ROOTS src/native/cpu/ops + DEVICE_HEADERS cpu=native/cpu/device_.h + TEST_DEVICES cpu=cpu) + +file(GLOB_RECURSE CPU_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cpu/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cpu/*.cpp") +if(CPU_SOURCES) + target_sources(infiniops PRIVATE ${CPU_SOURCES}) +endif() + +target_compile_definitions(infiniops PUBLIC WITH_CPU=1) + +find_package(OpenMP REQUIRED COMPONENTS CXX) +target_link_libraries(infiniops PRIVATE OpenMP::OpenMP_CXX) diff --git a/plugins/cpu/plugin.json b/plugins/cpu/plugin.json new file mode 100644 index 00000000..e7cde1a2 --- /dev/null +++ b/plugins/cpu/plugin.json @@ -0,0 +1,22 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [], + "device_headers": { + "cpu": "native/cpu/device_.h" + }, + "devices": [ + "cpu" + ], + "kind": "device", + "name": "cpu", + "operator_roots": [ + "src/native/cpu/ops" + ], + "source_roots": [ + "src/native/cpu" + ], + "test_devices": { + "cpu": "cpu" + } +} diff --git a/plugins/cuda-common/plugin.cmake b/plugins/cuda-common/plugin.cmake new file mode 100644 index 00000000..33ba6473 --- /dev/null +++ b/plugins/cuda-common/plugin.cmake @@ -0,0 +1,7 @@ +infini_ops_register_plugin( + NAME cuda-common + KIND shared + CONTRACT_VERSION 1 + CMAKE_ENTRY plugin.cmake + SOURCE_ROOTS src/native/cuda + OPERATOR_ROOTS src/native/cuda/ops) diff --git a/plugins/cuda-common/plugin.json b/plugins/cuda-common/plugin.json new file mode 100644 index 00000000..079cd9b7 --- /dev/null +++ b/plugins/cuda-common/plugin.json @@ -0,0 +1,16 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [], + "device_headers": {}, + "devices": [], + "kind": "shared", + "name": "cuda-common", + "operator_roots": [ + "src/native/cuda/ops" + ], + "source_roots": [ + "src/native/cuda" + ], + "test_devices": {} +} diff --git a/plugins/hygon/plugin.cmake b/plugins/hygon/plugin.cmake new file mode 100644 index 00000000..fa432db4 --- /dev/null +++ b/plugins/hygon/plugin.cmake @@ -0,0 +1,28 @@ +infini_ops_enable_plugin(cuda-common) +infini_ops_register_device( + NAME hygon + CMAKE_ENTRY plugin.cmake + DEVICES hygon + DEPENDS cuda-common + SOURCE_ROOTS src/native/cuda/hygon + OPERATOR_ROOTS src/native/cuda/hygon/ops + DEVICE_HEADERS hygon=native/cuda/hygon/device_.h + TEST_DEVICES hygon=cuda) + +file(GLOB_RECURSE HYGON_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cu" + "${INFINI_OPS_SRC_DIR}/native/cuda/hygon/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/hygon/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/hygon/*.cu") + +enable_language(CUDA) +target_compile_definitions(infiniops PUBLIC WITH_HYGON=1) +target_sources(infiniops PRIVATE ${HYGON_SOURCES}) + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas) +set_target_properties(infiniops PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) diff --git a/plugins/hygon/plugin.json b/plugins/hygon/plugin.json new file mode 100644 index 00000000..017d31b2 --- /dev/null +++ b/plugins/hygon/plugin.json @@ -0,0 +1,24 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [ + "cuda-common" + ], + "device_headers": { + "hygon": "native/cuda/hygon/device_.h" + }, + "devices": [ + "hygon" + ], + "kind": "device", + "name": "hygon", + "operator_roots": [ + "src/native/cuda/hygon/ops" + ], + "source_roots": [ + "src/native/cuda/hygon" + ], + "test_devices": { + "hygon": "cuda" + } +} diff --git a/plugins/iluvatar/plugin.cmake b/plugins/iluvatar/plugin.cmake new file mode 100644 index 00000000..20914a6b --- /dev/null +++ b/plugins/iluvatar/plugin.cmake @@ -0,0 +1,24 @@ +infini_ops_enable_plugin(cuda-common) +infini_ops_register_device( + NAME iluvatar + CMAKE_ENTRY plugin.cmake + DEVICES iluvatar + DEPENDS cuda-common + SOURCE_ROOTS src/native/cuda/iluvatar + OPERATOR_ROOTS src/native/cuda/iluvatar/ops + DEVICE_HEADERS iluvatar=native/cuda/iluvatar/device_.h + TEST_DEVICES iluvatar=cuda) + +file(GLOB_RECURSE ILUVATAR_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cu" + "${INFINI_OPS_SRC_DIR}/native/cuda/iluvatar/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/iluvatar/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/iluvatar/*.cu") + +target_compile_definitions(infiniops PUBLIC WITH_ILUVATAR=1) +target_sources(infiniops PRIVATE ${ILUVATAR_SOURCES}) + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas CUDA::cuda_driver) diff --git a/plugins/iluvatar/plugin.json b/plugins/iluvatar/plugin.json new file mode 100644 index 00000000..feaba7d4 --- /dev/null +++ b/plugins/iluvatar/plugin.json @@ -0,0 +1,24 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [ + "cuda-common" + ], + "device_headers": { + "iluvatar": "native/cuda/iluvatar/device_.h" + }, + "devices": [ + "iluvatar" + ], + "kind": "device", + "name": "iluvatar", + "operator_roots": [ + "src/native/cuda/iluvatar/ops" + ], + "source_roots": [ + "src/native/cuda/iluvatar" + ], + "test_devices": { + "iluvatar": "cuda" + } +} diff --git a/plugins/metax/plugin.cmake b/plugins/metax/plugin.cmake new file mode 100644 index 00000000..1d7ce7ed --- /dev/null +++ b/plugins/metax/plugin.cmake @@ -0,0 +1,28 @@ +infini_ops_enable_plugin(cuda-common) +infini_ops_register_device( + NAME metax + CMAKE_ENTRY plugin.cmake + DEVICES metax + DEPENDS cuda-common + SOURCE_ROOTS src/native/cuda/metax + OPERATOR_ROOTS src/native/cuda/metax/ops + DEVICE_HEADERS metax=native/cuda/metax/device_.h + TEST_DEVICES metax=cuda) + +file(GLOB_RECURSE METAX_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/metax/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/metax/*.maca") + +set_source_files_properties(${METAX_SOURCES} PROPERTIES LANGUAGE CXX) + +target_compile_definitions(infiniops PRIVATE WITH_METAX=1) +target_compile_options(infiniops PRIVATE "-x" "maca") +target_sources(infiniops PRIVATE ${METAX_SOURCES}) + +target_include_directories(infiniops PUBLIC "${MACA_PATH}/include") +target_link_libraries(infiniops PUBLIC + ${MACA_RUNTIME_LIB} + ${MACA_DNN_LIB} + ${MACA_BLAS_LIB}) diff --git a/plugins/metax/plugin.json b/plugins/metax/plugin.json new file mode 100644 index 00000000..23f2dde5 --- /dev/null +++ b/plugins/metax/plugin.json @@ -0,0 +1,24 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [ + "cuda-common" + ], + "device_headers": { + "metax": "native/cuda/metax/device_.h" + }, + "devices": [ + "metax" + ], + "kind": "device", + "name": "metax", + "operator_roots": [ + "src/native/cuda/metax/ops" + ], + "source_roots": [ + "src/native/cuda/metax" + ], + "test_devices": { + "metax": "cuda" + } +} diff --git a/plugins/moore/plugin.cmake b/plugins/moore/plugin.cmake new file mode 100644 index 00000000..b79be5df --- /dev/null +++ b/plugins/moore/plugin.cmake @@ -0,0 +1,26 @@ +infini_ops_enable_plugin(cuda-common) +infini_ops_register_device( + NAME moore + CMAKE_ENTRY plugin.cmake + DEVICES moore + DEPENDS cuda-common + SOURCE_ROOTS src/native/cuda/moore + OPERATOR_ROOTS src/native/cuda/moore/ops + DEVICE_HEADERS moore=native/cuda/moore/device_.h + TEST_DEVICES moore=musa) + +file(GLOB_RECURSE MOORE_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/moore/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/moore/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/moore/*.mu") + +set_source_files_properties(${MOORE_SOURCES} PROPERTIES LANGUAGE CXX) + +target_compile_definitions(infiniops PRIVATE WITH_MOORE=1) +target_compile_options(infiniops PRIVATE "-x" "musa") +target_sources(infiniops PRIVATE ${MOORE_SOURCES}) + +target_include_directories(infiniops PUBLIC "${MUSA_ROOT}/include") +target_link_libraries(infiniops PUBLIC ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) diff --git a/plugins/moore/plugin.json b/plugins/moore/plugin.json new file mode 100644 index 00000000..3586991b --- /dev/null +++ b/plugins/moore/plugin.json @@ -0,0 +1,24 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [ + "cuda-common" + ], + "device_headers": { + "moore": "native/cuda/moore/device_.h" + }, + "devices": [ + "moore" + ], + "kind": "device", + "name": "moore", + "operator_roots": [ + "src/native/cuda/moore/ops" + ], + "source_roots": [ + "src/native/cuda/moore" + ], + "test_devices": { + "moore": "musa" + } +} diff --git a/plugins/nvidia/plugin.cmake b/plugins/nvidia/plugin.cmake new file mode 100644 index 00000000..e7128ab3 --- /dev/null +++ b/plugins/nvidia/plugin.cmake @@ -0,0 +1,28 @@ +infini_ops_enable_plugin(cuda-common) +infini_ops_register_device( + NAME nvidia + CMAKE_ENTRY plugin.cmake + DEVICES nvidia + DEPENDS cuda-common + SOURCE_ROOTS src/native/cuda/nvidia + OPERATOR_ROOTS src/native/cuda/nvidia/ops + DEVICE_HEADERS nvidia=native/cuda/nvidia/device_.h + TEST_DEVICES nvidia=cuda) + +file(GLOB_RECURSE NVIDIA_SOURCES CONFIGURE_DEPENDS + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/*.cu" + "${INFINI_OPS_SRC_DIR}/native/cuda/nvidia/*.cc" + "${INFINI_OPS_SRC_DIR}/native/cuda/nvidia/*.cpp" + "${INFINI_OPS_SRC_DIR}/native/cuda/nvidia/*.cu") + +enable_language(CUDA) +target_compile_definitions(infiniops PUBLIC WITH_NVIDIA=1) +target_sources(infiniops PRIVATE ${NVIDIA_SOURCES}) + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cuda_driver) +set_target_properties(infiniops PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) diff --git a/plugins/nvidia/plugin.json b/plugins/nvidia/plugin.json new file mode 100644 index 00000000..28645bcb --- /dev/null +++ b/plugins/nvidia/plugin.json @@ -0,0 +1,24 @@ +{ + "cmake_entry": "plugin.cmake", + "contract_version": 1, + "depends": [ + "cuda-common" + ], + "device_headers": { + "nvidia": "native/cuda/nvidia/device_.h" + }, + "devices": [ + "nvidia" + ], + "kind": "device", + "name": "nvidia", + "operator_roots": [ + "src/native/cuda/nvidia/ops" + ], + "source_roots": [ + "src/native/cuda/nvidia" + ], + "test_devices": { + "nvidia": "cuda" + } +} diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index 7e33d68e..422d2b32 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -923,12 +923,15 @@ def _append_optional_params(prefix, params): return declarations, definitions -def _generate_generated_dispatch_header(op_names, devices, declarations): +def _generate_generated_dispatch_header( + op_names, devices, declarations, plugin_registry=None +): header_base_includes = "\n".join( f'#include "base/{op_name}.h"' for op_name in op_names ) header_device_includes = "\n".join( - f'#include "{path}"' for path in _device_marker_headers(devices) + f'#include "{path}"' + for path in _device_marker_headers(devices, plugin_registry) ) return f"""#ifndef INFINI_OPS_GENERATED_BINDINGS_GENERATED_DISPATCH_H_ @@ -1053,9 +1056,12 @@ def _generate_operator_call_instantiation_header(op_names, declarations): """ -def _generate_operator_call_instantiation_source(devices, impl_paths, definitions): +def _generate_operator_call_instantiation_source( + devices, impl_paths, definitions, plugin_registry=None +): device_includes = "\n".join( - f'#include "{path}"' for path in _device_marker_headers(devices) + f'#include "{path}"' + for path in _device_marker_headers(devices, plugin_registry) ) impl_includes = "\n".join( f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths @@ -1076,7 +1082,12 @@ def _generate_operator_call_instantiation_source(devices, impl_paths, definition """ -def _device_marker_headers(devices): +def _device_marker_headers(devices, plugin_registry=None): + if plugin_registry is not None: + paths = plugin_registry.get("device_headers", {}) + + return [paths[device] for device in devices if device in paths] + paths = { "cpu": "native/cpu/device_.h", "nvidia": "native/cuda/nvidia/device_.h", @@ -1085,6 +1096,7 @@ def _device_marker_headers(devices): "metax": "native/cuda/metax/device_.h", "moore": "native/cuda/moore/device_.h", "iluvatar": "native/cuda/iluvatar/device_.h", + "hygon": "native/cuda/hygon/device_.h", } return [paths[device] for device in devices if device in paths] @@ -1118,7 +1130,7 @@ def _matches_scan_dir(impl_path, scan_dirs): ) -def _index_impl_headers(impl_roots, scan_dirs): +def _index_impl_headers(impl_roots, scan_dirs=None): """Index implementation headers by base operator class name. The previous implementation scanned every implementation header once per @@ -1130,7 +1142,7 @@ def _index_impl_headers(impl_roots, scan_dirs): for impl_root in impl_roots: for impl_path in impl_root.rglob("*.h"): - if not _matches_scan_dir(impl_path, scan_dirs): + if scan_dirs is not None and not _matches_scan_dir(impl_path, scan_dirs): continue text = impl_path.read_text() @@ -1141,7 +1153,9 @@ def _index_impl_headers(impl_roots, scan_dirs): return by_operator -def _get_all_ops(devices, with_torch=False, with_ninetoothed=False): +def _get_all_ops( + devices, with_torch=False, with_ninetoothed=False, plugin_registry=None +): scan_dirs = set(devices) if with_torch: @@ -1161,12 +1175,24 @@ def _get_all_ops(devices, with_torch=False, with_ninetoothed=False): if with_torch and _GENERATED_BASE_DIR.exists(): base_dirs.append(_GENERATED_BASE_DIR) - impl_roots = [_SRC_DIR] + if plugin_registry is not None and plugin_registry.get("operator_roots"): + impl_roots = [pathlib.Path(root) for root in plugin_registry["operator_roots"]] + impl_scan_dirs = None + else: + impl_roots = [_SRC_DIR] + impl_scan_dirs = scan_dirs if with_torch and (_GENERATION_DIR / "torch").exists(): impl_roots.append(_GENERATION_DIR) + if impl_scan_dirs is not None: + impl_scan_dirs.add("torch") + + if with_ninetoothed: + impl_roots.append(_SRC_DIR / "ninetoothed") + if impl_scan_dirs is not None: + impl_scan_dirs.add("ninetoothed") - impl_headers_by_operator = _index_impl_headers(impl_roots, scan_dirs) + impl_headers_by_operator = _index_impl_headers(impl_roots, impl_scan_dirs) for base_dir in base_dirs: for file_path in base_dir.iterdir(): @@ -1258,7 +1284,7 @@ def _dispatch_gen_batch_size(): parser.add_argument( "--devices", nargs="+", - default="cpu", + default=["cpu"], type=str, help="Devices to use. Please pick from `cpu`, `nvidia`, `cambricon`, `ascend`, `metax`, `moore`, `iluvatar`, `kunlun`, `hygon`, and `qy`. (default: `cpu`)", ) @@ -1274,7 +1300,19 @@ def _dispatch_gen_batch_size(): help="Include NineToothed backend implementations.", ) + parser.add_argument( + "--plugin-registry", + type=pathlib.Path, + default=None, + help="Path to a build-time plugin registry JSON file.", + ) + args = parser.parse_args() + plugin_registry = None + if args.plugin_registry is not None: + plugin_registry = json.loads(args.plugin_registry.read_text()) + if "devices" in plugin_registry: + args.devices = plugin_registry["devices"] # Wipe previous outputs so files for ops that have since been removed # from the active set (e.g. when toggling `--with-torch`) do not linger @@ -1294,6 +1332,7 @@ def _dispatch_gen_batch_size(): args.devices, with_torch=args.with_torch, with_ninetoothed=args.with_ninetoothed, + plugin_registry=plugin_registry, ) bind_func_names = [] @@ -1342,7 +1381,7 @@ def _dispatch_gen_batch_size(): bind_func_names.append(bind_func_name) dispatch_header = _generate_generated_dispatch_header( - op_names, args.devices, dispatch_declarations + op_names, args.devices, dispatch_declarations, plugin_registry ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) @@ -1384,6 +1423,7 @@ def _dispatch_gen_batch_size(): args.devices, impl_paths, call_instantiation_definitions, + plugin_registry, ) ( _GENERATED_SRC_DIR diff --git a/scripts/infini_ops_plugin_registry.py b/scripts/infini_ops_plugin_registry.py new file mode 100644 index 00000000..b1b1fc7a --- /dev/null +++ b/scripts/infini_ops_plugin_registry.py @@ -0,0 +1,181 @@ +import json +import pathlib + +KNOWN_DEVICES = { + "cpu", + "nvidia", + "iluvatar", + "hygon", + "metax", + "moore", + "cambricon", + "ascend", +} + +REQUIRED_FIELDS = { + "name", + "kind", + "contract_version", + "devices", + "depends", + "cmake_entry", + "source_roots", + "operator_roots", + "device_headers", + "test_devices", +} + + +def _as_list(value, field, plugin_name): + if not isinstance(value, list): + raise ValueError(f"plugin `{plugin_name}` field `{field}` must be a `list`") + + return value + + +def _as_dict(value, field, plugin_name): + if not isinstance(value, dict): + raise ValueError(f"plugin `{plugin_name}` field `{field}` must be a `dict`") + + return value + + +def _load_manifest(path): + data = json.loads(path.read_text(encoding="utf-8")) + missing = REQUIRED_FIELDS.difference(data) + + if missing: + raise ValueError( + f"plugin manifest `{path}` is missing required fields: " + f"{', '.join(sorted(missing))}" + ) + + name = data["name"] + + if name != path.parent.name: + raise ValueError( + f"plugin manifest `{path}` declares name `{name}`, " + f"expected `{path.parent.name}`" + ) + + if data["kind"] not in {"shared", "device"}: + raise ValueError(f"plugin `{name}` has invalid kind `{data['kind']}`") + + if data["contract_version"] != 1: + raise ValueError( + f"plugin `{name}` uses unsupported contract version " + f"`{data['contract_version']}`" + ) + + cmake_entry = data["cmake_entry"] + if not isinstance(cmake_entry, str) or not cmake_entry: + raise ValueError(f"plugin `{name}` field `cmake_entry` must be a `string`") + + if not (path.parent / cmake_entry).is_file(): + raise ValueError( + f"plugin `{name}` `CMake` entry `{cmake_entry}` was not found" + ) + + devices = _as_list(data["devices"], "devices", name) + depends = _as_list(data["depends"], "depends", name) + _as_list(data["source_roots"], "source_roots", name) + _as_list(data["operator_roots"], "operator_roots", name) + device_headers = _as_dict(data["device_headers"], "device_headers", name) + test_devices = _as_dict(data["test_devices"], "test_devices", name) + + for device in devices: + if device not in KNOWN_DEVICES: + raise ValueError(f"plugin `{name}` declares unknown device `{device}`") + + for device in device_headers: + if device not in devices: + raise ValueError( + f"plugin `{name}` has device header for non-owned device `{device}`" + ) + + for device in test_devices: + if device not in devices: + raise ValueError( + f"plugin `{name}` has test device for non-owned device `{device}`" + ) + + if data["kind"] == "device" and not devices: + raise ValueError(f"device plugin `{name}` must declare at least one device") + + if data["kind"] == "shared" and devices: + raise ValueError(f"shared plugin `{name}` must not declare devices") + + for dependency in depends: + if not isinstance(dependency, str): + raise ValueError(f"plugin `{name}` dependency names must be `string`s") + + return data + + +def _append_unique(values, new_values): + for value in new_values: + if value not in values: + values.append(value) + + +def load_plugin_registry(plugin_root, requested_plugins): + plugin_root = pathlib.Path(plugin_root) + manifests = { + path.parent.name: _load_manifest(path) + for path in sorted(plugin_root.glob("*/plugin.json")) + } + + ordered = [] + visiting = [] + visited = set() + + def visit(name): + if name in visited: + return + + if name in visiting: + cycle = " -> ".join([*visiting, name]) + raise ValueError(f"plugin dependency cycle detected: {cycle}") + + if name not in manifests: + raise ValueError(f"requested plugin `{name}` was not found") + + visiting.append(name) + for dependency in manifests[name]["depends"]: + visit(dependency) + visiting.pop() + visited.add(name) + ordered.append(name) + + for name in requested_plugins: + visit(name) + + devices = [] + source_roots = [] + operator_roots = [] + device_headers = {} + test_devices = {} + + for name in ordered: + manifest = manifests[name] + _append_unique(devices, manifest["devices"]) + _append_unique(source_roots, manifest["source_roots"]) + _append_unique(operator_roots, manifest["operator_roots"]) + device_headers.update(manifest["device_headers"]) + test_devices.update(manifest["test_devices"]) + + return { + "plugins": ordered, + "devices": devices, + "source_roots": source_roots, + "operator_roots": operator_roots, + "device_headers": device_headers, + "test_devices": test_devices, + } + + +def write_plugin_registry(path, registry): + pathlib.Path(path).write_text( + json.dumps(registry, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b0ca302..72c9343c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,51 +5,15 @@ include(GNUInstallDirs) file(GLOB BASE_SRCS CONFIGURE_DEPENDS "*.cc") target_sources(infiniops PRIVATE ${BASE_SRCS}) -set(DEVICE_LIST "") +include("${PROJECT_SOURCE_DIR}/cmake/infini_ops_plugins.cmake") -if(WITH_CPU) - set(CPU_PATTERNS - "native/cpu/*.cc" - "native/cpu/*.cpp" - ) - - file(GLOB_RECURSE CPU_SOURCES CONFIGURE_DEPENDS ${CPU_PATTERNS}) - list(APPEND CORE_SOURCES ${CPU_SOURCES}) - - target_compile_definitions(infiniops PUBLIC WITH_CPU=1) - - find_package(OpenMP REQUIRED COMPONENTS CXX) - target_link_libraries(infiniops PRIVATE OpenMP::OpenMP_CXX) - - list(APPEND DEVICE_LIST "cpu") -endif() - -if(WITH_NVIDIA) - set(NVIDIA_PATTERNS - "native/cuda/*.cc" - "native/cuda/*.cpp" - "native/cuda/*.cu" - "native/cuda/nvidia/*.cc" - "native/cuda/nvidia/*.cpp" - "native/cuda/nvidia/*.cu" - ) - - file(GLOB_RECURSE NVIDIA_SOURCES CONFIGURE_DEPENDS ${NVIDIA_PATTERNS}) - - enable_language(CUDA) +set(INFINI_OPS_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +infini_ops_enable_requested_plugins() +infini_ops_get_enabled_devices(DEVICE_LIST) - target_compile_definitions(infiniops PUBLIC WITH_NVIDIA=1) - target_sources(infiniops PRIVATE ${NVIDIA_SOURCES}) - - find_package(CUDAToolkit REQUIRED) - target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cuda_driver) - - list(APPEND DEVICE_LIST "nvidia") - set_target_properties(infiniops PROPERTIES - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - ) -endif() +set(INFINI_OPS_PLUGIN_REGISTRY_PATH + "${CMAKE_CURRENT_BINARY_DIR}/infini_ops_plugin_registry.json") +infini_ops_write_plugin_registry("${INFINI_OPS_PLUGIN_REGISTRY_PATH}") if(WITH_NINETOOTHED) find_package(Python COMPONENTS Interpreter REQUIRED) @@ -84,229 +48,6 @@ if(WITH_NINETOOTHED) target_sources(infiniops PRIVATE ${INFINIOPS_NINETOOTHED_SOURCES}) endif() -if(WITH_ILUVATAR) - set(ILUVATAR_PATTERNS - "native/cuda/*.cc" - "native/cuda/*.cpp" - "native/cuda/*.cu" - "native/cuda/iluvatar/*.cc" - "native/cuda/iluvatar/*.cpp" - "native/cuda/iluvatar/*.cu" - ) - - file(GLOB_RECURSE ILUVATAR_SOURCES CONFIGURE_DEPENDS ${ILUVATAR_PATTERNS}) - - target_compile_definitions(infiniops PUBLIC WITH_ILUVATAR=1) - target_sources(infiniops PRIVATE ${ILUVATAR_SOURCES}) - - find_package(CUDAToolkit REQUIRED) - target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas CUDA::cuda_driver) - - list(APPEND DEVICE_LIST "iluvatar") -endif() - -if(WITH_HYGON) - set(HYGON_PATTERNS - "native/cuda/*.cc" - "native/cuda/*.cpp" - "native/cuda/*.cu" - "native/cuda/hygon/*.cc" - "native/cuda/hygon/*.cpp" - "native/cuda/hygon/*.cu" - ) - - file(GLOB_RECURSE HYGON_SOURCES CONFIGURE_DEPENDS ${HYGON_PATTERNS}) - - enable_language(CUDA) - - target_compile_definitions(infiniops PUBLIC WITH_HYGON=1) - target_sources(infiniops PRIVATE ${HYGON_SOURCES}) - - find_package(CUDAToolkit REQUIRED) - target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas) - - set_target_properties(infiniops PROPERTIES - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - ) - - list(APPEND DEVICE_LIST "hygon") -endif() - -if(WITH_METAX) - set(METAX_PATTERNS - "native/cuda/*.cc" - "native/cuda/*.cpp" - "native/cuda/metax/*.cc" - "native/cuda/metax/*.maca" - ) - - file(GLOB_RECURSE METAX_SOURCES CONFIGURE_DEPENDS ${METAX_PATTERNS}) - - set_source_files_properties(${METAX_SOURCES} PROPERTIES LANGUAGE CXX) - - target_compile_definitions(infiniops PRIVATE WITH_METAX=1) - target_compile_options(infiniops PRIVATE "-x" "maca") - target_sources(infiniops PRIVATE ${METAX_SOURCES}) - - target_include_directories(infiniops PUBLIC "${MACA_PATH}/include") - target_link_libraries(infiniops PUBLIC - ${MACA_RUNTIME_LIB} - ${MACA_DNN_LIB} - ${MACA_BLAS_LIB} - ) - - list(APPEND DEVICE_LIST "metax") -endif() - -if(WITH_MOORE) - set(MOORE_PATTERNS - "native/cuda/*.cc" - "native/cuda/*.cpp" - "native/cuda/moore/*.cc" - "native/cuda/moore/*.cpp" - "native/cuda/moore/*.mu" - ) - - file(GLOB_RECURSE MOORE_SOURCES CONFIGURE_DEPENDS ${MOORE_PATTERNS}) - - set_source_files_properties(${MOORE_SOURCES} PROPERTIES LANGUAGE CXX) - - target_compile_definitions(infiniops PRIVATE WITH_MOORE=1) - target_compile_options(infiniops PRIVATE "-x" "musa") - target_sources(infiniops PRIVATE ${MOORE_SOURCES}) - - target_include_directories(infiniops PUBLIC "${MUSA_ROOT}/include") - target_link_libraries(infiniops PUBLIC ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) - - list(APPEND DEVICE_LIST "moore") -endif() - -if(WITH_CAMBRICON) - file(GLOB_RECURSE CAMBRICON_MLU_SOURCES CONFIGURE_DEPENDS "native/cambricon/ops/*/*.mlu") - find_program(CNCC_COMPILER cncc HINTS "${NEUWARE_HOME}/bin" "$ENV{NEUWARE_HOME}/bin" /usr/local/neuware/bin) - if(CNCC_COMPILER) - message(STATUS "Found cncc: ${CNCC_COMPILER}") - set(MLU_COMPILE_OPTS - -c --bang-mlu-arch=mtp_592 -O3 -fPIC -Wall -Werror -std=c++17 -pthread - -I${CMAKE_CURRENT_SOURCE_DIR} -I${NEUWARE_HOME}/include - -idirafter /usr/local/neuware/lib/clang/11.1.0/include - ) - function(compile_mlu_file src_file) - get_filename_component(name ${src_file} NAME_WE) - get_filename_component(path ${src_file} DIRECTORY) - set(out_file "${CMAKE_CURRENT_BINARY_DIR}/${path}/${name}.o") - file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${path}") - add_custom_command(OUTPUT ${out_file} - COMMAND ${CNCC_COMPILER} ${MLU_COMPILE_OPTS} -c ${src_file} -o ${out_file} - DEPENDS ${src_file} - COMMENT "Building MLU kernel: ${src_file}" - ) - set_property(DIRECTORY APPEND PROPERTY CAMBRICON_OBJECTS ${out_file}) - endfunction() - foreach(src ${CAMBRICON_MLU_SOURCES}) - compile_mlu_file(${src}) - endforeach() - get_directory_property(CAMBRICON_OBJECT_FILES CAMBRICON_OBJECTS) - if(CAMBRICON_OBJECT_FILES) - target_sources(infiniops PRIVATE ${CAMBRICON_OBJECT_FILES}) - endif() - else() - message(WARNING "cncc compiler not found. MLU kernels will not be compiled.") - endif() - target_compile_definitions(infiniops PRIVATE WITH_CAMBRICON=1) - - target_include_directories(infiniops PUBLIC "${NEUWARE_HOME}/include") - target_link_libraries(infiniops PUBLIC ${CAMBRICON_RUNTIME_LIB} ${CAMBRICON_CNNL_LIB} ${CAMBRICON_CNNL_EXTRA_LIB} ${CAMBRICON_PAPI_LIB}) - - if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") - target_compile_options(infiniops PUBLIC - "$<$:SHELL:-idirafter /usr/local/neuware/lib/clang/11.1.0/include>" - ) - endif() - - list(APPEND DEVICE_LIST "cambricon") -endif() - -if(WITH_ASCEND) - # ASCEND_HOME is set by the top-level CMakeLists.txt. - file(GLOB_RECURSE ASCEND_SOURCES CONFIGURE_DEPENDS - "native/ascend/*.cc" - "native/ascend/*.cpp" - ) - # Exclude `kernel_impl.cpp` — `AscendC` device code, not compiled by the host C++ compiler. - list(FILTER ASCEND_SOURCES EXCLUDE REGEX ".*kernel_impl\\.cpp$") - # Exclude custom/ — standalone PyTorch extension, built separately. - list(FILTER ASCEND_SOURCES EXCLUDE REGEX ".*/custom/.*") - - target_compile_definitions(infiniops PUBLIC WITH_ASCEND=1) - target_sources(infiniops PRIVATE ${ASCEND_SOURCES}) - - # Resolve the driver lib dir two levels above the toolkit root. - get_filename_component(ASCEND_ROOT "${ASCEND_HOME}/../.." ABSOLUTE) - - # Prefer the real driver HAL; fall back to the toolkit stub for build-only - # environments (e.g., Docker CI images without hardware drivers installed). - # CANN <= 8.0: stub at runtime/lib64/stub/; CANN >= 8.5: devlib/-linux/devlib/. - set(ASCEND_HAL_REAL "${ASCEND_ROOT}/driver/lib64/driver/libascend_hal.so") - set(ASCEND_HAL_STUB "${ASCEND_HOME}/runtime/lib64/stub/libascend_hal.so") - set(ASCEND_HAL_DEVLIB "${ASCEND_HOME}/${CMAKE_SYSTEM_PROCESSOR}-linux/devlib/libascend_hal.so") - if(EXISTS "${ASCEND_HAL_REAL}") - set(ASCEND_HAL_LIB "${ASCEND_HAL_REAL}") - elseif(EXISTS "${ASCEND_HAL_STUB}") - set(ASCEND_HAL_LIB "${ASCEND_HAL_STUB}") - message(STATUS "ascend_hal: driver not found, using stub for linking") - elseif(EXISTS "${ASCEND_HAL_DEVLIB}") - set(ASCEND_HAL_LIB "${ASCEND_HAL_DEVLIB}") - message(STATUS "ascend_hal: driver not found, using devlib for linking") - else() - message(FATAL_ERROR "libascend_hal.so not found (tried ${ASCEND_HAL_REAL}, ${ASCEND_HAL_STUB}, and ${ASCEND_HAL_DEVLIB})") - endif() - - target_include_directories(infiniops PUBLIC - "${ASCEND_HOME}/include" - "${ASCEND_HOME}/include/aclnn" - "${ASCEND_HOME}/include/aclnnop") - target_link_libraries(infiniops PUBLIC - "${ASCEND_HOME}/lib64/libascendcl.so" - "${ASCEND_HOME}/lib64/libnnopbase.so" - "${ASCEND_HOME}/lib64/libopapi.so" - "${ASCEND_HAL_LIB}") - - # ATB (Ascend Transformer Boost) — provides fused operators like - # `PagedAttention` and `ReshapeAndCache` that are graph-capture safe. - set(ATB_HOME_DIR "$ENV{ATB_HOME_PATH}") - if(NOT ATB_HOME_DIR) - # Default search path under CANN nnal directory. - file(GLOB ATB_SEARCH_DIRS "/usr/local/Ascend/nnal/atb/*/atb/cxx_abi_1") - if(ATB_SEARCH_DIRS) - list(SORT ATB_SEARCH_DIRS ORDER DESCENDING) - list(GET ATB_SEARCH_DIRS 0 ATB_HOME_DIR) - endif() - endif() - - if(ATB_HOME_DIR AND EXISTS "${ATB_HOME_DIR}/include/atb/operation.h") - message(STATUS "ATB found: ${ATB_HOME_DIR}") - target_compile_definitions(infiniops PUBLIC INFINI_HAS_ATB=1) - target_include_directories(infiniops PUBLIC "${ATB_HOME_DIR}/include") - target_link_libraries(infiniops PUBLIC "${ATB_HOME_DIR}/lib/libatb.so") - else() - message(STATUS "ATB not found — ATB-based operators disabled") - endif() - - list(APPEND DEVICE_LIST "ascend") - - # Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`). - if(BUILD_CUSTOM_KERNEL) - add_subdirectory(native/ascend/custom) - - # Link the compiled `AscendC` kernel objects into `infiniops` so that - # custom kernel implementations (e.g. `RmsNorm` index 1) can call - # them via the generated launch functions. - target_compile_definitions(infiniops PUBLIC INFINI_HAS_CUSTOM_KERNELS=1) - endif() -endif() - if(WITH_TORCH) # Auto-generate ATen-backed operator wrappers from `scripts/torch_ops.yaml`. # The script writes into `${PROJECT_SOURCE_DIR}/generated/` (gitignored), @@ -517,7 +258,10 @@ if(GENERATE_OPERATOR_CALL_INSTANTIATIONS OR GENERATE_PYTHON_BINDINGS) # specializations for enabled backends, causing link-time or runtime # failures. - set(GENERATOR_ARGS --devices ${DEVICE_LIST}) + set(GENERATOR_ARGS --plugin-registry ${INFINI_OPS_PLUGIN_REGISTRY_PATH}) + if(DEVICE_LIST) + list(APPEND GENERATOR_ARGS --devices ${DEVICE_LIST}) + endif() if(WITH_TORCH) list(APPEND GENERATOR_ARGS --with-torch) endif() diff --git a/tests/test_generate_wrappers_plugins.py b/tests/test_generate_wrappers_plugins.py new file mode 100644 index 00000000..d53daf45 --- /dev/null +++ b/tests/test_generate_wrappers_plugins.py @@ -0,0 +1,65 @@ +import importlib.util +import pathlib +import sys +import types + + +def _load_wrapper_module(): + path = ( + pathlib.Path(__file__).resolve().parents[1] + / "scripts" + / "generate_wrappers.py" + ) + spec = importlib.util.spec_from_file_location("generate_wrappers_under_test", path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + clang_module = types.ModuleType("clang") + clang_cindex = types.ModuleType("clang.cindex") + clang_cindex.CursorKind = types.SimpleNamespace( + CONSTRUCTOR=object(), + CXX_METHOD=object(), + ) + clang_module.cindex = clang_cindex + sys.modules.setdefault("clang", clang_module) + sys.modules.setdefault("clang.cindex", clang_cindex) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + + return module + + +def test_device_marker_headers_come_from_plugin_registry(): + module = _load_wrapper_module() + registry = { + "device_headers": { + "cpu": "native/cpu/device_.h", + "nvidia": "plugins/nvidia/include/device_.h", + }, + } + + assert module._device_marker_headers(["nvidia"], registry) == [ + "plugins/nvidia/include/device_.h", + ] + + +def test_get_all_ops_scans_plugin_operator_roots(tmp_path, monkeypatch): + module = _load_wrapper_module() + base_dir = tmp_path / "base" + plugin_ops = tmp_path / "plugin" / "ops" / "demo" + base_dir.mkdir() + plugin_ops.mkdir(parents=True) + (base_dir / "demo.h").write_text("class Demo {};\n", encoding="utf-8") + impl_header = plugin_ops / "kernel.h" + impl_header.write_text( + "namespace infini::ops { class Operator {}; }\n", + encoding="utf-8", + ) + monkeypatch.setattr(module, "_BASE_DIR", base_dir) + monkeypatch.setattr(module, "_GENERATED_BASE_DIR", tmp_path / "generated" / "base") + + ops = module._get_all_ops( + ["cpu"], + plugin_registry={"operator_roots": [str(tmp_path / "plugin" / "ops")]}, + ) + + assert ops == {"demo": [impl_header]} diff --git a/tests/test_plugin_registry.py b/tests/test_plugin_registry.py new file mode 100644 index 00000000..431f3f11 --- /dev/null +++ b/tests/test_plugin_registry.py @@ -0,0 +1,225 @@ +import importlib.util +import json +import pathlib +import sys + + +def _load_registry_module(): + path = ( + pathlib.Path(__file__).resolve().parents[1] + / "scripts" + / "infini_ops_plugin_registry.py" + ) + spec = importlib.util.spec_from_file_location("infini_ops_plugin_registry", path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + + return module + + +def _write_manifest(root, name, data, create_cmake_entry=True): + plugin_dir = root / name + plugin_dir.mkdir() + path = plugin_dir / "plugin.json" + path.write_text(json.dumps(data), encoding="utf-8") + if create_cmake_entry and data.get("cmake_entry"): + (plugin_dir / data["cmake_entry"]).write_text( + "# test plugin entry\n", encoding="utf-8" + ) + + return path + + +def test_load_plugins_orders_dependencies_and_merges_device_metadata(tmp_path): + module = _load_registry_module() + plugin_root = tmp_path / "plugins" + plugin_root.mkdir() + _write_manifest( + plugin_root, + "cuda-common", + { + "name": "cuda-common", + "kind": "shared", + "contract_version": 1, + "devices": [], + "depends": [], + "cmake_entry": "plugin.cmake", + "source_roots": ["src/native/cuda"], + "operator_roots": ["src/native/cuda/ops"], + "device_headers": {}, + "test_devices": {}, + }, + ) + _write_manifest( + plugin_root, + "nvidia", + { + "name": "nvidia", + "kind": "device", + "contract_version": 1, + "devices": ["nvidia"], + "depends": ["cuda-common"], + "cmake_entry": "plugin.cmake", + "source_roots": ["src/native/cuda/nvidia"], + "operator_roots": ["src/native/cuda/nvidia/ops"], + "device_headers": {"nvidia": "native/cuda/nvidia/device_.h"}, + "test_devices": {"nvidia": "cuda"}, + }, + ) + + registry = module.load_plugin_registry(plugin_root, ["nvidia"]) + + assert registry["plugins"] == ["cuda-common", "nvidia"] + assert registry["devices"] == ["nvidia"] + assert registry["device_headers"] == { + "nvidia": "native/cuda/nvidia/device_.h", + } + assert registry["operator_roots"] == [ + "src/native/cuda/ops", + "src/native/cuda/nvidia/ops", + ] + assert registry["test_devices"] == {"nvidia": "cuda"} + + +def test_load_plugins_rejects_missing_cmake_entry(tmp_path): + module = _load_registry_module() + plugin_root = tmp_path / "plugins" + plugin_root.mkdir() + _write_manifest( + plugin_root, + "cpu", + { + "name": "cpu", + "kind": "device", + "contract_version": 1, + "devices": ["cpu"], + "depends": [], + "cmake_entry": "plugin.cmake", + "source_roots": ["src/native/cpu"], + "operator_roots": ["src/native/cpu/ops"], + "device_headers": {"cpu": "native/cpu/device_.h"}, + "test_devices": {"cpu": "cpu"}, + }, + create_cmake_entry=False, + ) + + try: + module.load_plugin_registry(plugin_root, ["cpu"]) + except ValueError as exc: + assert "`CMake` entry" in str(exc) + else: + raise AssertionError("manifest with missing `CMake` entry should be rejected") + + +def test_load_plugins_rejects_unknown_devices(tmp_path): + module = _load_registry_module() + plugin_root = tmp_path / "plugins" + plugin_root.mkdir() + _write_manifest( + plugin_root, + "unknown", + { + "name": "unknown", + "kind": "device", + "contract_version": 1, + "devices": ["unknown"], + "depends": [], + "cmake_entry": "plugin.cmake", + "source_roots": [], + "operator_roots": [], + "device_headers": {"unknown": "native/unknown/device_.h"}, + "test_devices": {"unknown": "unknown"}, + }, + ) + + try: + module.load_plugin_registry(plugin_root, ["unknown"]) + except ValueError as exc: + assert "unknown device" in str(exc) + else: + raise AssertionError("unknown device manifest should be rejected") + + +def test_load_plugins_rejects_dependency_cycles(tmp_path): + module = _load_registry_module() + plugin_root = tmp_path / "plugins" + plugin_root.mkdir() + _write_manifest( + plugin_root, + "a", + { + "name": "a", + "kind": "shared", + "contract_version": 1, + "devices": [], + "depends": ["b"], + "cmake_entry": "plugin.cmake", + "source_roots": [], + "operator_roots": [], + "device_headers": {}, + "test_devices": {}, + }, + ) + _write_manifest( + plugin_root, + "b", + { + "name": "b", + "kind": "shared", + "contract_version": 1, + "devices": [], + "depends": ["a"], + "cmake_entry": "plugin.cmake", + "source_roots": [], + "operator_roots": [], + "device_headers": {}, + "test_devices": {}, + }, + ) + + try: + module.load_plugin_registry(plugin_root, ["a"]) + except ValueError as exc: + assert "dependency cycle" in str(exc) + else: + raise AssertionError("cyclic plugin dependencies should be rejected") + + +def test_builtin_plugin_manifests_load_individually(): + module = _load_registry_module() + plugin_root = pathlib.Path(__file__).resolve().parents[1] / "plugins" + + for plugin_name in ( + "cpu", + "nvidia", + "iluvatar", + "hygon", + "metax", + "moore", + "cambricon", + "ascend", + ): + registry = module.load_plugin_registry(plugin_root, [plugin_name]) + + assert plugin_name in registry["plugins"] + assert registry["devices"] == [plugin_name] + assert plugin_name in registry["device_headers"] + assert plugin_name in registry["test_devices"] + + +def test_builtin_plugin_manifests_cover_cpu_and_cuda_common_dependencies(): + module = _load_registry_module() + plugin_root = pathlib.Path(__file__).resolve().parents[1] / "plugins" + + cpu_registry = module.load_plugin_registry(plugin_root, ["cpu"]) + nvidia_registry = module.load_plugin_registry(plugin_root, ["nvidia"]) + + assert cpu_registry["plugins"] == ["cpu"] + assert cpu_registry["devices"] == ["cpu"] + assert cpu_registry["device_headers"] == {"cpu": "native/cpu/device_.h"} + assert nvidia_registry["plugins"][:2] == ["cuda-common", "nvidia"] + assert nvidia_registry["devices"] == ["nvidia"] + assert "src/native/cuda/ops" in nvidia_registry["operator_roots"] + assert "src/native/cuda/nvidia/ops" in nvidia_registry["operator_roots"]