can_use_native_serial_stack_impl Class — pytorch Architecture
Architecture documentation for the can_use_native_serial_stack_impl class in SerialStackImpl.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/SerialStackImpl.h lines 68–118
template <typename TensorListType>
bool can_use_native_serial_stack_impl(Tensor& result, TensorListType tensors, int64_t dim) {
TORCH_CHECK(!tensors.empty(), "expected a non-empty list of Tensors");
const Tensor& first_tensor = tensors[0];
// stack dimension should be in range [0,firstTensor.dim())
// dim == firstTensor.dim() is a valid input, but it is handled by default code path
// that uses unsqueeze
if (dim >= first_tensor.dim()) return false;
// Native stack doesn't apply any tensor is skipped.
if (first_tensor.numel() == 0 && first_tensor.dim() == 1) return false;
// there should be no type promotion
if (result.dtype() != first_tensor.dtype()) return false;
auto first_tensor_mem_format = first_tensor.suggest_memory_format();
ScalarType dtype = first_tensor.scalar_type();
if (!result.is_contiguous(first_tensor_mem_format)) {
return false;
}
// fast path only works for Double and Float
if (dtype != ScalarType::Double && dtype != ScalarType::Float) {
return false;
}
// check remainder of inputs
#ifndef STRIP_ERROR_MESSAGES
auto const &first_tensor_shape = first_tensor.sizes();
#endif
for (const auto i : c10::irange(1, tensors.size())) {
auto const &tensor = tensors[i];
TORCH_CHECK(tensors[i].sizes() == first_tensor.sizes(),
"stack expects each tensor to be equal size, but got ", first_tensor_shape,
" at entry 0 and ", tensor.sizes(), " at entry ", i);
// every tensor must be contiguous
// tensor sizes and strides must be the same
// there should be no type promotion
if (!tensor.is_contiguous(first_tensor_mem_format) ||
tensor.strides() != first_tensor.strides() ||
tensor.dtype() != dtype) {
return false;
}
}
// fast native stack should only be used when it is not worth using multiple threads
// or there is only one thread. Note that we aren't checking result.numel() here because
// it may not have been resized and we want to defer that cost till later.
int64_t numel_in_stack = first_tensor.numel() * tensors.size();
return numel_in_stack < at::internal::GRAIN_SIZE || at::get_num_threads() == 1;
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free