Home / Class/ reduce_sparse_csr_dim1_cpu_template Class — pytorch Architecture

reduce_sparse_csr_dim1_cpu_template Class — pytorch Architecture

Architecture documentation for the reduce_sparse_csr_dim1_cpu_template class in SparseCsrTensorMath.cpp from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp lines 1122–1236

template <typename scalar_t, typename ReductionOp>
Tensor reduce_sparse_csr_dim1_cpu_template(const Tensor& sparse, ReductionOp rop) {
  /*
    Consider the following sparse tensor:

    1 * * * *
    * * * 2 *
    * * 3 * *
    * * * * *
    4 * 5 * *

    that has CSR representation

      crow_indices = [0, 1, 2, 3, 3, 5]
      col_indices = [0, 3, 2, 0, 2]
      values = [1, 2, 3, 4, 5]

    Reduction with dim=1 results:

    1
    2
    3
    *
    rop(4, 5)

    that has CSR representation

      new_crow_indices = [0, 1, 2, 3, 3, 4]
      new_col_indices = [0, 0, 0, 0]
      new_values = [1, 2, 3, rop(4, 5)]

    In general, the result CSR data can be computed as follows:

      new_crow_indices = [0]
      for i in range(1, nrows+1):
          new_crow_indices[i] = new_crow_indices[i-1] + (crow_indices[i] == crow_indices[i-1])
      nnz = new_crow_indices[-1]
      new_col_indices = zeros(nnz)
      new_values.resize(nnz)
      j = -1
      for i in range(1, nrows+1):
          if crow_indices[i] == crow_indices[i-1]:
              continue
          j += 1
          new_values[j] = rop(values[crow_indices[i] : crow_indices[i-1]])
  */

  Tensor crow_indices = sparse.crow_indices();
  auto ioptions = crow_indices.options();
  Tensor values = sparse.values();
  auto nrows = sparse.size(0);

  Tensor new_crow_indices = at::empty({crow_indices.numel()}, ioptions);
  Tensor new_col_indices = at::empty({}, ioptions);
  Tensor row_map = at::empty({nrows}, ioptions);

  // Set `is_cuda` = `true` in acc_type in CPU backend. Because the accumulate type
  // of float should be float in current scenario. In CUDA, float is the accumulate type
  // of float, while in CPU, double is the accumulate type of float.
  using acc_t = at::acc_type<scalar_t, true>;
  auto acc_buffer = at::sparse_csr::create_acc_buffer<acc_t, scalar_t>(
      values.options(), values.scalar_type());
  Tensor new_values = std::get<0>(acc_buffer);
  Tensor new_values_acc = std::get<1>(acc_buffer);

  AT_DISPATCH_INDEX_TYPES(crow_indices.scalar_type(), "reduce_sparse_csr_dim1_cpu_indices",
                          [&]() {
    index_t* crow_indices_ptr = crow_indices.data_ptr<index_t>();
    index_t* new_crow_indices_ptr = new_crow_indices.data_ptr<index_t>();
    index_t* row_map_ptr = row_map.data_ptr<index_t>();
    int64_t nnz = 0;
    new_crow_indices_ptr[0] = 0;
    for(int64_t i=0; i<nrows; i++) {
      if (crow_indices_ptr[i] != crow_indices_ptr[i + 1]) {
        row_map_ptr[i] = nnz;
        nnz++;
      }
      new_crow_indices_ptr[i + 1] = nnz;
    }
    new_col_indices.resize_(nnz);
    new_col_indices.fill_(index_t(0));
    new_values.resize_(nnz);
    new_values_acc.resize_(nnz);

    scalar_t* values_ptr = values.data_ptr<scalar_t>();
    acc_t* new_values_acc_ptr = new_values_acc.data_ptr<acc_t>();

    at::parallel_for(
        0,
        nrows,
        internal::GRAIN_SIZE,
        [&](int64_t irow_start, int64_t irow_end) {
            index_t i_end = crow_indices_ptr[irow_start];
            for (index_t h = irow_start; h < irow_end; ++h) {
              index_t i_start = i_end;
              i_end = crow_indices_ptr[h+1];
              if (i_start != i_end) {
                acc_t res = static_cast<acc_t>(values_ptr[i_start]);
                for (index_t i = i_start + 1; i < i_end; i++) {
                  res = rop(res, static_cast<acc_t>(values_ptr[i]));
                }
                new_values_acc_ptr[row_map_ptr[h]] = res;
              }
            }
        });
                          });

  copy_from_acc_buffer(new_values, new_values_acc);

  return at::native::_sparse_csr_tensor_unsafe(new_crow_indices, new_col_indices, new_values,
                                                {sparse.size(0), 1},
                                                new_values.scalar_type(),
                                                sparse.layout(),
                                                new_values.device());
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free