Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ __device__ __forceinline__ float warpReduceMax(float x) {
}

__device__ __forceinline__ unsigned int cvtaToShared(const void *ptr) {
#if defined(__CUDA_ARCH__) && defined(__cvta_generic_to_shared)
return static_cast<unsigned int>(__cvta_generic_to_shared(ptr));
#else
return static_cast<unsigned int>(reinterpret_cast<uintptr_t>(ptr));
#endif
}

__device__ __forceinline__ void cpAsyncCaSharedGlobal16(void *dst_shared, const void *src_global) {
Expand Down
14 changes: 13 additions & 1 deletion src/infiniop/ops/paged_attention/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_attention.h"

#ifdef ENABLE_NVIDIA_API
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
Expand Down Expand Up @@ -36,6 +36,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -57,6 +60,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -82,6 +88,9 @@ __C infiniStatus_t infiniopPagedAttention(
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -102,6 +111,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down
14 changes: 13 additions & 1 deletion src/infiniop/ops/paged_attention_prefill/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_attention_prefill.h"

#ifdef ENABLE_NVIDIA_API
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_prefill_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
Expand Down Expand Up @@ -38,6 +38,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -59,6 +62,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down Expand Up @@ -87,6 +93,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill(
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down
14 changes: 13 additions & 1 deletion src/infiniop/ops/paged_caching/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_caching.h"

#ifdef ENABLE_NVIDIA_API
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_caching_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
Expand Down Expand Up @@ -31,6 +31,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -52,6 +55,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -77,6 +83,9 @@ __C infiniStatus_t infiniopPagedCaching(
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand All @@ -97,6 +106,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down
2 changes: 1 addition & 1 deletion test/infinicore/ops/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

import infinicore
import torch
from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner
from framework.tensor import TensorInitializer
Expand All @@ -12,7 +13,6 @@
to_torch_dtype,
)

import infinicore

# ==============================================================================
# Operator-specific configuration
Expand Down
6 changes: 3 additions & 3 deletions xmake/iluvatar.lua
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ target("infiniop-iluvatar")
add_links("cudart", "cublas", "cudnn")

set_warnings("all", "error")
add_cuflags("-Wno-error=unused-private-field")
add_cuflags("-Wno-error=unused-private-field", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
if has_config("ivcore-20") then
add_cuflags("--cuda-gpu-arch=ivcore20", {force = true})
end
add_culdflags("-fPIC")
add_cxflags("-fPIC")
add_cxxflags("-fPIC")
add_cxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cxxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")

-- set_languages("cxx17") 天数似乎不能用这个配置
add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu")
Expand Down