reduce_sparse_csr_dim01_cpu_template Class — pytorch Architecture
Architecture documentation for the reduce_sparse_csr_dim01_cpu_template class in SparseCsrTensorMath.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp lines 1238–1292
template <typename scalar_t, typename ReductionOp>
Tensor reduce_sparse_csr_dim01_cpu_template(const Tensor& sparse, ReductionOp rop) {
auto ioptions = sparse.col_indices().options();
Tensor values = sparse.values();
auto numel = values.numel();
auto nnz = std::min<int64_t>(1, numel);
/* TODO: we can likely do about 3x better than parallel_reduce:
In [2]: t=torch.randn(5000, 5000).to_sparse_csr()
In [3]: %timeit torch._sparse_csr_sum(t, dim=(0, 1), keepdim=True)
3.39 ms ± 898 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [4]: %timeit torch.sum(t.values())
1.07 ms ± 291 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
*/
// Set `is_cuda` = `true` in acc_type in CPU backend. Because the accumulate type
// of float should be float in current scenario. In CUDA, float is the accumulate type
// of float, while in CPU, double is the accumulate type of float.
using acc_t = at::acc_type<scalar_t, true>;
scalar_t* values_ptr = values.data_ptr<scalar_t>();
acc_t value = at::parallel_reduce(
0,
numel,
internal::GRAIN_SIZE,
rop.identity(),
[&](int64_t i_start, int64_t i_end, scalar_t identity) {
acc_t res = acc_t(identity);
for (int64_t i=i_start; i<i_end; i++) {
acc_t val = acc_t(values_ptr[i]);
res = rop(res, val);
}
return res;
}, rop
);
Tensor new_col_indices = at::zeros({nnz}, ioptions);
Tensor new_crow_indices = at::tensor(ArrayRef<int64_t>{0, nnz}, ioptions);
Tensor new_values;
auto result_dtype = at::isIntegralType(values.scalar_type(), /*includeBool=*/true) ? ScalarType::Long : values.scalar_type();
if (numel > 0) {
new_values = at::empty({1}, values.options().dtype(result_dtype));
new_values.fill_(value);
} else {
new_values = at::empty({}, values.options().dtype(result_dtype));
}
return at::native::_sparse_csr_tensor_unsafe(new_crow_indices, new_col_indices, new_values,
{1, std::min<int64_t>(1, sparse.size(1))},
new_values.scalar_type(),
sparse.layout(),
new_values.device());
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free