mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 03:54:56 +00:00
Indexing cuda (#235)
* Allow using uint8_t for indexing. * Revert the default cuda feature. * Add a cuda-kernel for index-select. * Add a test for gather.
This commit is contained in:
@ -316,10 +316,7 @@ fn cmp(device: &Device) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_select() -> Result<()> {
|
||||
// TODO: Test on cuda once the kernel is available.
|
||||
let device = &Device::Cpu;
|
||||
fn index_select(device: &Device) -> Result<()> {
|
||||
let ids = Tensor::new(&[0u32, 2u32, 1u32], device)?;
|
||||
let t = Tensor::arange(0f32, 12f32, device)?.reshape((4, 3))?;
|
||||
assert_eq!(
|
||||
@ -349,6 +346,38 @@ fn index_select() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn gather(device: &Device) -> Result<()> {
|
||||
let ids = Tensor::new(&[[0u32], [2u32], [1u32], [0u32]], device)?;
|
||||
let t = Tensor::arange(0f32, 12f32, device)?.reshape((4, 3))?;
|
||||
assert_eq!(
|
||||
t.to_vec2::<f32>()?,
|
||||
&[
|
||||
[0.0, 1.0, 2.0],
|
||||
[3.0, 4.0, 5.0],
|
||||
[6.0, 7.0, 8.0],
|
||||
[9.0, 10.0, 11.0]
|
||||
]
|
||||
);
|
||||
let hs = t.gather(&ids, 1)?;
|
||||
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0], [5.0], [7.0], [9.0]]);
|
||||
let ids = Tensor::new(
|
||||
&[[0u32, 0u32], [2u32, 0u32], [1u32, 1u32], [0u32, 2u32]],
|
||||
device,
|
||||
)?;
|
||||
let hs = t.gather(&ids, 1)?;
|
||||
assert_eq!(
|
||||
hs.to_vec2::<f32>()?,
|
||||
&[[0.0, 0.0], [5.0, 3.0], [7.0, 7.0], [9.0, 11.0]]
|
||||
);
|
||||
let ids = Tensor::new(&[[0u32, 2u32, 0u32]], device)?;
|
||||
let hs = t.gather(&ids, 0)?;
|
||||
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0, 7.0, 2.0]]);
|
||||
let ids = Tensor::new(&[[0u32, 2u32, 0u32], [0u32, 1u32, 1u32]], device)?;
|
||||
let hs = t.gather(&ids, 0)?;
|
||||
assert_eq!(hs.to_vec2::<f32>()?, &[[0.0, 7.0, 2.0], [0.0, 4.0, 5.0]]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn matmul(device: &Device) -> Result<()> {
|
||||
let data = vec![1.0f32, 2.0, 3.0, 4.0];
|
||||
let a = Tensor::from_slice(&data, (2, 2), device)?;
|
||||
@ -513,3 +542,5 @@ test_device!(embeddings, embeddings_cpu, embeddings_gpu);
|
||||
test_device!(cmp, cmp_cpu, cmp_gpu);
|
||||
test_device!(matmul, matmul_cpu, matmul_gpu);
|
||||
test_device!(broadcasting, broadcasting_cpu, broadcasting_gpu);
|
||||
test_device!(index_select, index_select_cpu, index_select_gpu);
|
||||
test_device!(gather, gather_cpu, gather_gpu);
|
||||
|
Reference in New Issue
Block a user