Home / Class/ nrows Class — pytorch Architecture

nrows Class — pytorch Architecture

Architecture documentation for the nrows class in SumKernel.cpp from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/cpu/SumKernel.cpp lines 345–410

template <typename scalar_t, int64_t nrows, typename LoadPolicy>
std::array<scalar_t, nrows> multi_row_sum(
    const char * C10_RESTRICT in_data,
    const int64_t row_stride,
    const int64_t col_stride,
    const int64_t size) {
  constexpr int64_t num_levels = 4;

  const int64_t level_power =
      std::max(int64_t(4), utils::CeilLog2(size) / num_levels);
  const int64_t level_step = (1 << level_power);
  const int64_t level_mask = level_step - 1;

  std::array<std::array<scalar_t, nrows>, num_levels> acc{};
  for (auto &row:acc) {
    row.fill(scalar_t(0));
  }

  int64_t i = 0;
  for (; i + level_step <= size;) {
    for (int64_t j = 0; j < level_step; ++j, ++i) {
      const char * sum_base = in_data + i * row_stride;
      #if !defined(COMPILING_FOR_MIN_SIZE)
      # pragma unroll
      #endif
      for (const auto k : c10::irange(nrows)) {
        acc[0][k] += LoadPolicy::load(sum_base, col_stride, k);
      }
    }

    for (const auto j : c10::irange(1, num_levels)) {
      #if !defined(COMPILING_FOR_MIN_SIZE)
      # pragma unroll
      #endif
      for (const auto k : c10::irange(nrows)) {
        acc[j][k] += acc[j-1][k];
        acc[j-1][k] = scalar_t(0);
      }

      const auto mask = (level_mask << (j * level_power));
      if ((i & mask) != 0) {
        break;
      }
    }
  }

  for (; i < size; ++i) {
    const char * sum_base = in_data + i * row_stride;
    #if !defined(COMPILING_FOR_MIN_SIZE)
    # pragma unroll
    #endif
    for (const auto k : c10::irange(nrows)) {
      acc[0][k] += LoadPolicy::load(sum_base, col_stride, k);
    }
  }

  for (const auto j : c10::irange(1, num_levels)) {
    #if !defined(COMPILING_FOR_MIN_SIZE)
    # pragma unroll
    #endif
    for (const auto k : c10::irange(nrows)) {
      acc[0][k] += acc[j][k];
    }
  }
  return acc[0];
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free