stack_serial_kernel_impl Class — pytorch Architecture
Architecture documentation for the stack_serial_kernel_impl class in SerialStackImpl.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/SerialStackImpl.h lines 28–61
template <typename scalar_t, typename TensorListType>
void stack_serial_kernel_impl(Tensor& result, TensorListType tensors, int64_t dim) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
dim >= 0 && dim <= result.dim(),
"dim out of range in stack_serial_kernel_impl");
int64_t outer =
result.numel() / (result.sizes()[dim] * result.strides()[dim]);
scalar_t* result_data = result.data_ptr<scalar_t>();
int64_t ninputs = tensors.size();
std::vector<InputMeta> inputs;
inputs.reserve(ninputs);
for (const auto& tensor : tensors) {
inputs.emplace_back(tensor, dim, tensor.strides()[dim]);
}
using Vec = vec::Vectorized<scalar_t>;
scalar_t* result_ptr = result_data;
for (const auto i : c10::irange(outer)) {
for (const auto j : c10::irange(ninputs)) {
int64_t local_inner = inputs[j].inner_size;
scalar_t* input_ptr = (scalar_t*)(inputs[j].data_ptr) + i * local_inner;
if (local_inner < Vec::size()) {
for (const auto k : c10::irange(local_inner)) {
result_ptr[k] = input_ptr[k];
}
} else {
vec::map(
[](Vec x) { return x; }, result_ptr, input_ptr, local_inner);
}
result_ptr += local_inner;
}
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free