upsample_bicubic2d_backward_out_frame Class — pytorch Architecture

Architecture documentation for the upsample_bicubic2d_backward_out_frame class in UpSampleBicubic2d.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

aten/src/ATen/native/UpSampleBicubic2d.cpp lines 107–178

template <typename scalar_t>
void upsample_bicubic2d_backward_out_frame(
    const scalar_t* odata,
    scalar_t* idata,
    int64_t input_height,
    int64_t input_width,
    int64_t output_height,
    int64_t output_width,
    int64_t nbatch,
    int64_t channels,
    bool align_corners,
    std::optional<double> scales_h,
    std::optional<double> scales_w) {
  channels = channels * nbatch;
  auto input_slice_size = input_height * input_width;
  auto output_slice_size = output_height * output_width;

  using opmath_t = at::opmath_type<scalar_t>;
  const opmath_t height_scale = area_pixel_compute_scale<opmath_t>(
      input_height, output_height, align_corners, scales_h);
  const opmath_t width_scale = area_pixel_compute_scale<opmath_t>(
      input_width, output_width, align_corners, scales_w);
  at::parallel_for(0, channels, at::internal::GRAIN_SIZE / output_slice_size / 4, [&](int64_t start, int64_t end) {
    opmath_t* acc_data_ptr = nullptr;
    std::unique_ptr<opmath_t[]> buffer_data;
    if constexpr (!std::is_same_v<scalar_t, opmath_t>) {
      buffer_data = std::make_unique<opmath_t[]>(input_slice_size);
      acc_data_ptr = buffer_data.get();
      memset(acc_data_ptr, 0, sizeof(opmath_t) * input_slice_size);
    }
    for (const auto i : c10::irange(start, end)) {
      scalar_t* in = idata + i * input_slice_size;
      const scalar_t* out = odata + i * output_slice_size;
      for (const auto output_y : c10::irange(output_height)) {
        for (const auto output_x : c10::irange(output_width)) {

          const opmath_t real_x = area_pixel_compute_source_index(width_scale, output_x, align_corners, /*cubic=*/true);
          int64_t input_x;
          opmath_t t_x;
          guard_index_and_lambda(real_x, input_width, input_x, t_x);

          const opmath_t real_y = area_pixel_compute_source_index(height_scale, output_y, align_corners, /*cubic=*/true);
          int64_t input_y;
          opmath_t t_y;
          guard_index_and_lambda(real_y, input_height, input_y, t_y);

          std::array<opmath_t, 4> x_coeffs;
          std::array<opmath_t, 4> y_coeffs;

          get_cubic_upsample_coefficients<opmath_t>(x_coeffs.data(), t_x);
          get_cubic_upsample_coefficients<opmath_t>(y_coeffs.data(), t_y);

          opmath_t out_value = out[output_y * output_width + output_x];
          for (const auto ii : c10::irange(4)) {
            for (const auto jj : c10::irange(4)) {
              upsample_increment_value_bounded<opmath_t>(
                  acc_data_ptr == nullptr ? reinterpret_cast<opmath_t*>(in) : acc_data_ptr,
                  input_width,
                  input_height,
                  input_x - 1 + ii,
                  input_y - 1 + jj,
                  out_value * y_coeffs[jj] * x_coeffs[ii]);
            }
          }
        }
      }
      if (acc_data_ptr != nullptr) {
        apply_grad_input(acc_data_ptr, in, input_slice_size);
      }
    }
  });
}

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free