Simd support (#448)

* Import the simd intrinsics in candle-core.

* simd version of reduce-sum.

* Bugfix.

* Fix some clippy lints.
This commit is contained in:
Laurent Mazare
2023-08-15 09:50:38 +01:00
committed by GitHub
parent 90374097dc
commit 495e0b7580
10 changed files with 487 additions and 14 deletions

View File

@ -1051,7 +1051,7 @@ impl<'a> Map2 for Conv1D<'a> {
let num_threads = crate::utils::get_num_threads();
for offset in 0..p.k_size {
crate::cpu_kernels::par_range(0, p.c_out, num_threads, |dst_c_idx| {
crate::cpu::kernels::par_range(0, p.c_out, num_threads, |dst_c_idx| {
let dst_idx = dst_c_idx * l_out;
let k_cont = (0..p.c_in)
.map(|c_in_idx| k[dst_c_idx * k_s0 + c_in_idx * k_s1 + offset * k_s2])
@ -1123,7 +1123,7 @@ impl<'a> Map2 for Conv2D<'a> {
for offset_h in 0..p.k_h {
for offset_w in 0..p.k_w {
crate::cpu_kernels::par_range(0, p.c_out, num_threads, |dst_c_idx| {
crate::cpu::kernels::par_range(0, p.c_out, num_threads, |dst_c_idx| {
let dst_idx = dst_c_idx * out_w * out_h;
let k_cont = (0..p.c_in)
.map(|c_in_idx| {