upsample_bilinear2d_out_frame Class — pytorch Architecture
Architecture documentation for the upsample_bilinear2d_out_frame class in UpSampleBilinear2d.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/UpSampleBilinear2d.cpp lines 38–140
template <typename scalar_t>
void upsample_bilinear2d_out_frame(
Tensor& output,
const Tensor& input,
int64_t input_height,
int64_t input_width,
int64_t output_height,
int64_t output_width,
int64_t nbatch,
int64_t channels,
bool align_corners,
std::optional<double> scales_h,
std::optional<double> scales_w) {
auto* idata = static_cast<const scalar_t*>(input.const_data_ptr());
auto* odata = static_cast<scalar_t*>(output.data_ptr());
channels = channels * nbatch;
if (channels == 0 || output_height == 0 || output_width == 0) {
return;
}
auto* i_p = reinterpret_cast<const typename scalar_t::underlying*>(idata);
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(odata);
// special case: just copy
if (input_height == output_height && input_width == output_width) {
std::memcpy(
o_p,
i_p,
channels * input_height * input_width *
sizeof(typename scalar_t::underlying));
return;
}
const auto rheight = area_pixel_compute_scale<float>(
input_height, output_height, align_corners, scales_h);
const auto rwidth = area_pixel_compute_scale<float>(
input_width, output_width, align_corners, scales_w);
float output_scale = static_cast<float>(output.q_scale() / input.q_scale());
const int64_t input_q_zero_point = input.q_zero_point();
const int64_t output_q_zero_point = output.q_zero_point();
std::vector<UpsampleBilinearParamW> params_w;
params_w.reserve(output_width);
for (const auto w2 : c10::irange(output_width)) {
const auto w1r = area_pixel_compute_source_index<float>(
rwidth, w2, align_corners, /*cubic=*/false);
const int64_t w1 = w1r;
const int64_t w1p = (w1 < input_width - 1) ? 1 : 0;
const float w1lambda = w1r - w1;
const float w0lambda = static_cast<float>(1.) - w1lambda;
params_w.emplace_back(w1, w1p, w0lambda, w1lambda);
}
// compared to 'nearest', each requires 4 points and takes additional * and +
// set the scale to be 16.
int64_t grain_size = internal::GRAIN_SIZE / std::max(int64_t{1}, output_width) / 16;
at::parallel_for(0, channels * output_height, grain_size, [&](int64_t begin, int64_t end) {
int64_t nc{0}, h2{0};
data_index_init(begin, nc, channels, h2, output_height);
for (const auto i : c10::irange(begin, end)) {
const auto h1r = area_pixel_compute_source_index<float>(
rheight, h2, align_corners, /*cubic=*/false);
const int64_t h1 = h1r;
const int64_t h1p = (h1 < input_height - 1) ? 1 : 0;
const float h1lambda = h1r - h1;
const float h0lambda = static_cast<float>(1.) - h1lambda;
const auto* i_ptr = &i_p[nc * input_height * input_width];
auto* pos2 = &o_p[i * output_width];
for (const auto w2 : c10::irange(output_width)) {
const auto& param_w = params_w[w2];
const int64_t w1 = param_w.w1;
const int64_t w1p = param_w.w1p;
const float w0lambda = param_w.w0lambda;
const float w1lambda = param_w.w1lambda;
const auto* pos1 = i_ptr + h1 * input_width + w1;
const float result = h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda *
(w0lambda * pos1[h1p * input_width] +
w1lambda * pos1[h1p * input_width + w1p]) - input_q_zero_point;
// requantization
pos2[w2] = at::native::quantize_val<scalar_t>(
output_scale, output_q_zero_point, result)
.val_;
}
data_index_step(nc, channels, h2, output_height);
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free