Home / Class/ upsample_avx_bilinear_bicubic_uint8 Class — pytorch Architecture

upsample_avx_bilinear_bicubic_uint8 Class — pytorch Architecture

Architecture documentation for the upsample_avx_bilinear_bicubic_uint8 class in UpSampleKernelAVXAntialias.h from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/cpu/UpSampleKernelAVXAntialias.h lines 303–431

template <typename scale_type, class F>
void upsample_avx_bilinear_bicubic_uint8(
    const at::Tensor& input_,
    const at::Tensor& output,
    bool align_corners,
    const scale_type& scales,
    bool antialias) {
  auto batch_size = input_.size(0);
  auto num_channels = input_.size(1);
  auto xin = input_.size(3);
  auto yin = input_.size(2);
  auto xout = output.size(3);
  auto yout = output.size(2);

  if (xin == xout && yin == yout) {
    output.copy_(input_);
    return;
  }

  at::Tensor input = input_;
  if (!(input.is_contiguous() || input.is_contiguous(at::MemoryFormat::ChannelsLast))) {
    // If input is not contiguous with memory format channels first or channels last,
    // we explicitly convert the input to contiguous channels last memory format.
    // This simplifies the rest of the code and let us assume that the format is only contiguous channels first or channels last,
    // Most tensors going through this `if` block won't need to go through unpacking, but those having C < 3 may
    // have to (this means 2 copies are made). We could avoid the extra copy by handling non-contiguous input
    // directly within unpack_rgb() and pack_rgb(), but initial attempts showed that this is fairly complex.
    input = input.contiguous(at::MemoryFormat::ChannelsLast);
  }

  auto need_horizontal = xout != xin;
  auto need_vertical = yout != yin;

  int ksize_horiz, ksize_vert;
  std::vector<at::Tensor> horiz_indices_weights, vert_indices_weights;
  unsigned int horiz_weights_precision, vert_weights_precision;

  bool skip_unpacking = (num_channels == 3 || num_channels == 4) && input.is_contiguous(at::MemoryFormat::ChannelsLast);
  bool skip_packing = (num_channels == 3 || num_channels == 4) && output.is_contiguous(at::MemoryFormat::ChannelsLast);

  if (need_horizontal) {
    int interp_dim = 3;
    auto stride = skip_unpacking ? num_channels : 4;
    std::tie(horiz_indices_weights, ksize_horiz, horiz_weights_precision) =
        F::compute_index_ranges_int16_weights(
            /*input_size=*/xin,
            /*output_size=*/xout,
            /*stride=*/stride,
            /*ndims=*/4,
            /*reshape_dim=*/interp_dim,
            /*align_corners=*/align_corners,
            /*opt_scale=*/scales[interp_dim - 2],
            /*antialias=*/antialias,
            /*align_i32=*/true);
  }

  if (need_vertical) {
    int interp_dim = 2;
    auto stride = skip_unpacking ? num_channels * xout : 4 * xout;
    std::tie(vert_indices_weights, ksize_vert, vert_weights_precision) =
        F::compute_index_ranges_int16_weights(
            /*input_size=*/yin,
            /*output_size=*/yout,
            /*stride=*/stride,
            /*ndims=*/4,
            /*reshape_dim=*/interp_dim,
            /*align_corners=*/align_corners,
            /*opt_scale=*/scales[interp_dim - 2],
            /*antialias=*/antialias,
            /*align_i32=*/true);
  }

  at::Tensor buffer_horiz, buffer_vert;
  // Minor optimization: we can avoid allocating an extra buffer if we're performing
  // horizontal-only or vertical-only interpolation, and if the tensor doesn't
  // need repacking
  if (need_horizontal && (need_vertical || !skip_packing)) {
    auto c = skip_unpacking ? num_channels : 4;
    buffer_horiz = at::empty({c, yin, xout}, input.options());
  }
  if (need_vertical && !skip_packing) {
    auto c = skip_unpacking ? num_channels : 4;
    buffer_vert = at::empty({c, yout, xout}, input.options());
  }

  for (const auto i : c10::irange(batch_size)) {

    at::Tensor unpacked_input = skip_unpacking ? input[i] : unpack_rgb(input[i]);
    at::Tensor unpacked_output;

    if (need_horizontal) {
      at::Tensor unpacked_output_temp = (need_vertical || !skip_packing) ? buffer_horiz : output[i];

      if (skip_unpacking && num_channels == 3) {
        ImagingResampleHorizontal<3>(
          unpacked_output_temp,
          unpacked_input,
          ksize_horiz,
          horiz_indices_weights,
          horiz_weights_precision);
      } else {
        ImagingResampleHorizontal<4>(
            unpacked_output_temp,
            unpacked_input,
            ksize_horiz,
            horiz_indices_weights,
            horiz_weights_precision);
      }
      unpacked_output = unpacked_input = unpacked_output_temp;
    }
    if (need_vertical) {
      unpacked_output = skip_packing ? output[i] : buffer_vert;

      ImagingResampleVertical(
          unpacked_output,
          unpacked_input,
          ksize_vert,
          vert_indices_weights,
          vert_weights_precision
      );
    }

    TORCH_INTERNAL_ASSERT(unpacked_output.defined());

    if (!skip_packing) {
      pack_rgb(unpacked_output, output[i]);
    }
  }
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free