Indexing cuda (#235)

* Allow using uint8_t for indexing.

* Revert the default cuda feature.

* Add a cuda-kernel for index-select.

* Add a test for gather.
This commit is contained in:
Laurent Mazare
2023-07-24 20:22:47 +01:00
committed by GitHub
parent b50f932e7c
commit 581b104f97
3 changed files with 277 additions and 31 deletions

View File

@ -316,10 +316,7 @@ fn cmp(device: &Device) -> Result<()> {
Ok(())
}
#[test]
fn index_select() -> Result<()> {
// TODO: Test on cuda once the kernel is available.
let device = &Device::Cpu;
fn index_select(device: &Device) -> Result<()> {
let ids = Tensor::new(&[0u32, 2u32, 1u32], device)?;
let t = Tensor::arange(0f32, 12f32, device)?.reshape((4, 3))?;
assert_eq!(
@ -349,6 +346,38 @@ fn index_select() -> Result<()> {
Ok(())
}
fn gather(device: &Device) -> Result<()> {
let ids = Tensor::new(&[[0u32], [2u32], [1u32], [0u32]], device)?;
let t = Tensor::arange(0f32, 12f32, device)?.reshape((4, 3))?;
assert_eq!(
t.to_vec2::<f32>()?,
&[
[0.0, 1.0, 2.0],
[3.0, 4.0, 5.0],
[6.0, 7.0, 8.0],
[9.0, 10.0, 11.0]
]
);
let hs = t.gather(&ids, 1)?;
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0], [5.0], [7.0], [9.0]]);
let ids = Tensor::new(
&[[0u32, 0u32], [2u32, 0u32], [1u32, 1u32], [0u32, 2u32]],
device,
)?;
let hs = t.gather(&ids, 1)?;
assert_eq!(
hs.to_vec2::<f32>()?,
&[[0.0, 0.0], [5.0, 3.0], [7.0, 7.0], [9.0, 11.0]]
);
let ids = Tensor::new(&[[0u32, 2u32, 0u32]], device)?;
let hs = t.gather(&ids, 0)?;
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0, 7.0, 2.0]]);
let ids = Tensor::new(&[[0u32, 2u32, 0u32], [0u32, 1u32, 1u32]], device)?;
let hs = t.gather(&ids, 0)?;
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0, 7.0, 2.0], [0.0, 4.0, 5.0]]);
Ok(())
}
fn matmul(device: &Device) -> Result<()> {
let data = vec![1.0f32, 2.0, 3.0, 4.0];
let a = Tensor::from_slice(&data, (2, 2), device)?;
@ -513,3 +542,5 @@ test_device!(embeddings, embeddings_cpu, embeddings_gpu);
test_device!(cmp, cmp_cpu, cmp_gpu);
test_device!(matmul, matmul_cpu, matmul_gpu);
test_device!(broadcasting, broadcasting_cpu, broadcasting_gpu);
test_device!(index_select, index_select_cpu, index_select_gpu);
test_device!(gather, gather_cpu, gather_gpu);