cpu_cum_base_kernel Class — pytorch Architecture
Architecture documentation for the cpu_cum_base_kernel class in ReduceOpsKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/ReduceOpsKernel.cpp lines 30–77
template <typename scalar_t, typename func_t>
inline void cpu_cum_base_kernel(const Tensor& result,
const Tensor& self,
int64_t dim,
const func_t& f,
scalar_t init_val) {
if (result.sizes() != self.sizes()) {
at::native::resize_output(result, self.sizes());
}
if (self.numel() == 0) {
return;
}
const auto input_ndim = self.dim();
if (input_ndim == 0) {
result.fill_(self);
return;
}
// TODO This probably should be using at::native::make_reduction
auto iter = TensorIteratorConfig()
.check_all_same_dtype(false)
.resize_outputs(false)
.declare_static_shape(self.sizes(), /*squash_dims=*/dim)
.add_output(result)
.add_const_input(self)
.build();
auto result_dim_stride = ensure_nonempty_stride(result, dim);
auto self_dim_stride = ensure_nonempty_stride(self, dim);
auto loop = [&](char** data, const int64_t* strides, int64_t n) {
auto* result_data_bytes = data[0];
const auto* self_data_bytes = data[1];
for ([[maybe_unused]] const auto i : c10::irange(n)) {
f((scalar_t*)result_data_bytes,
result_dim_stride,
(scalar_t*)self_data_bytes,
self_dim_stride,
init_val);
result_data_bytes += strides[0];
self_data_bytes += strides[1];
}
};
int64_t grain_size = internal::GRAIN_SIZE / std::max(int64_t{1}, self.size(dim));
iter.for_each(loop, grain_size);
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free