nrows Class — pytorch Architecture
Architecture documentation for the nrows class in SumKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/SumKernel.cpp lines 345–410
template <typename scalar_t, int64_t nrows, typename LoadPolicy>
std::array<scalar_t, nrows> multi_row_sum(
const char * C10_RESTRICT in_data,
const int64_t row_stride,
const int64_t col_stride,
const int64_t size) {
constexpr int64_t num_levels = 4;
const int64_t level_power =
std::max(int64_t(4), utils::CeilLog2(size) / num_levels);
const int64_t level_step = (1 << level_power);
const int64_t level_mask = level_step - 1;
std::array<std::array<scalar_t, nrows>, num_levels> acc{};
for (auto &row:acc) {
row.fill(scalar_t(0));
}
int64_t i = 0;
for (; i + level_step <= size;) {
for (int64_t j = 0; j < level_step; ++j, ++i) {
const char * sum_base = in_data + i * row_stride;
#if !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto k : c10::irange(nrows)) {
acc[0][k] += LoadPolicy::load(sum_base, col_stride, k);
}
}
for (const auto j : c10::irange(1, num_levels)) {
#if !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto k : c10::irange(nrows)) {
acc[j][k] += acc[j-1][k];
acc[j-1][k] = scalar_t(0);
}
const auto mask = (level_mask << (j * level_power));
if ((i & mask) != 0) {
break;
}
}
}
for (; i < size; ++i) {
const char * sum_base = in_data + i * row_stride;
#if !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto k : c10::irange(nrows)) {
acc[0][k] += LoadPolicy::load(sum_base, col_stride, k);
}
}
for (const auto j : c10::irange(1, num_levels)) {
#if !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto k : c10::irange(nrows)) {
acc[0][k] += acc[j][k];
}
}
return acc[0];
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free