reduce_sparse_csr_dim1_cpu_template Class — pytorch Architecture
Architecture documentation for the reduce_sparse_csr_dim1_cpu_template class in SparseCsrTensorMath.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp lines 1122–1236
template <typename scalar_t, typename ReductionOp>
Tensor reduce_sparse_csr_dim1_cpu_template(const Tensor& sparse, ReductionOp rop) {
/*
Consider the following sparse tensor:
1 * * * *
* * * 2 *
* * 3 * *
* * * * *
4 * 5 * *
that has CSR representation
crow_indices = [0, 1, 2, 3, 3, 5]
col_indices = [0, 3, 2, 0, 2]
values = [1, 2, 3, 4, 5]
Reduction with dim=1 results:
1
2
3
*
rop(4, 5)
that has CSR representation
new_crow_indices = [0, 1, 2, 3, 3, 4]
new_col_indices = [0, 0, 0, 0]
new_values = [1, 2, 3, rop(4, 5)]
In general, the result CSR data can be computed as follows:
new_crow_indices = [0]
for i in range(1, nrows+1):
new_crow_indices[i] = new_crow_indices[i-1] + (crow_indices[i] == crow_indices[i-1])
nnz = new_crow_indices[-1]
new_col_indices = zeros(nnz)
new_values.resize(nnz)
j = -1
for i in range(1, nrows+1):
if crow_indices[i] == crow_indices[i-1]:
continue
j += 1
new_values[j] = rop(values[crow_indices[i] : crow_indices[i-1]])
*/
Tensor crow_indices = sparse.crow_indices();
auto ioptions = crow_indices.options();
Tensor values = sparse.values();
auto nrows = sparse.size(0);
Tensor new_crow_indices = at::empty({crow_indices.numel()}, ioptions);
Tensor new_col_indices = at::empty({}, ioptions);
Tensor row_map = at::empty({nrows}, ioptions);
// Set `is_cuda` = `true` in acc_type in CPU backend. Because the accumulate type
// of float should be float in current scenario. In CUDA, float is the accumulate type
// of float, while in CPU, double is the accumulate type of float.
using acc_t = at::acc_type<scalar_t, true>;
auto acc_buffer = at::sparse_csr::create_acc_buffer<acc_t, scalar_t>(
values.options(), values.scalar_type());
Tensor new_values = std::get<0>(acc_buffer);
Tensor new_values_acc = std::get<1>(acc_buffer);
AT_DISPATCH_INDEX_TYPES(crow_indices.scalar_type(), "reduce_sparse_csr_dim1_cpu_indices",
[&]() {
index_t* crow_indices_ptr = crow_indices.data_ptr<index_t>();
index_t* new_crow_indices_ptr = new_crow_indices.data_ptr<index_t>();
index_t* row_map_ptr = row_map.data_ptr<index_t>();
int64_t nnz = 0;
new_crow_indices_ptr[0] = 0;
for(int64_t i=0; i<nrows; i++) {
if (crow_indices_ptr[i] != crow_indices_ptr[i + 1]) {
row_map_ptr[i] = nnz;
nnz++;
}
new_crow_indices_ptr[i + 1] = nnz;
}
new_col_indices.resize_(nnz);
new_col_indices.fill_(index_t(0));
new_values.resize_(nnz);
new_values_acc.resize_(nnz);
scalar_t* values_ptr = values.data_ptr<scalar_t>();
acc_t* new_values_acc_ptr = new_values_acc.data_ptr<acc_t>();
at::parallel_for(
0,
nrows,
internal::GRAIN_SIZE,
[&](int64_t irow_start, int64_t irow_end) {
index_t i_end = crow_indices_ptr[irow_start];
for (index_t h = irow_start; h < irow_end; ++h) {
index_t i_start = i_end;
i_end = crow_indices_ptr[h+1];
if (i_start != i_end) {
acc_t res = static_cast<acc_t>(values_ptr[i_start]);
for (index_t i = i_start + 1; i < i_end; i++) {
res = rop(res, static_cast<acc_t>(values_ptr[i]));
}
new_values_acc_ptr[row_map_ptr[h]] = res;
}
}
});
});
copy_from_acc_buffer(new_values, new_values_acc);
return at::native::_sparse_csr_tensor_unsafe(new_crow_indices, new_col_indices, new_values,
{sparse.size(0), 1},
new_values.scalar_type(),
sparse.layout(),
new_values.device());
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free