Home / Class/ can_use_native_serial_stack_impl Class — pytorch Architecture

can_use_native_serial_stack_impl Class — pytorch Architecture

Architecture documentation for the can_use_native_serial_stack_impl class in SerialStackImpl.h from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/cpu/SerialStackImpl.h lines 68–118

template <typename TensorListType>
bool can_use_native_serial_stack_impl(Tensor& result, TensorListType tensors, int64_t dim) {
  TORCH_CHECK(!tensors.empty(), "expected a non-empty list of Tensors");
  const Tensor& first_tensor = tensors[0];
  // stack dimension should be in range [0,firstTensor.dim())
  // dim == firstTensor.dim() is a valid input, but it is handled by default code path
  // that uses unsqueeze
  if (dim >= first_tensor.dim()) return false;
  // Native stack doesn't apply any tensor is skipped.
  if (first_tensor.numel() == 0 && first_tensor.dim() == 1) return false;
  // there should be no type promotion
  if (result.dtype() != first_tensor.dtype()) return false;

  auto first_tensor_mem_format = first_tensor.suggest_memory_format();
  ScalarType dtype = first_tensor.scalar_type();

  if (!result.is_contiguous(first_tensor_mem_format)) {
    return false;
  }

  // fast path only works for Double and Float
  if (dtype != ScalarType::Double && dtype != ScalarType::Float) {
    return false;
  }

  // check remainder of inputs
#ifndef STRIP_ERROR_MESSAGES
  auto const &first_tensor_shape = first_tensor.sizes();
#endif
  for (const auto i : c10::irange(1, tensors.size())) {
    auto const &tensor = tensors[i];
    TORCH_CHECK(tensors[i].sizes() == first_tensor.sizes(),
      "stack expects each tensor to be equal size, but got ", first_tensor_shape,
      " at entry 0 and ", tensor.sizes(), " at entry ", i);

    // every tensor must be contiguous
    // tensor sizes and strides must be the same
    // there should be no type promotion
    if (!tensor.is_contiguous(first_tensor_mem_format) ||
      tensor.strides() != first_tensor.strides() ||
      tensor.dtype() != dtype) {
      return false;
    }
  }

  // fast native stack should only be used when it is not worth using multiple threads
  // or there is only one thread. Note that we aren't checking result.numel() here because
  // it may not have been resized and we want to defer that cost till later.
  int64_t numel_in_stack = first_tensor.numel() * tensors.size();
  return numel_in_stack < at::internal::GRAIN_SIZE || at::get_num_threads() == 1;
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free