upsample_avx_bilinear_bicubic_uint8 Class — pytorch Architecture
Architecture documentation for the upsample_avx_bilinear_bicubic_uint8 class in UpSampleKernelAVXAntialias.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/UpSampleKernelAVXAntialias.h lines 303–431
template <typename scale_type, class F>
void upsample_avx_bilinear_bicubic_uint8(
const at::Tensor& input_,
const at::Tensor& output,
bool align_corners,
const scale_type& scales,
bool antialias) {
auto batch_size = input_.size(0);
auto num_channels = input_.size(1);
auto xin = input_.size(3);
auto yin = input_.size(2);
auto xout = output.size(3);
auto yout = output.size(2);
if (xin == xout && yin == yout) {
output.copy_(input_);
return;
}
at::Tensor input = input_;
if (!(input.is_contiguous() || input.is_contiguous(at::MemoryFormat::ChannelsLast))) {
// If input is not contiguous with memory format channels first or channels last,
// we explicitly convert the input to contiguous channels last memory format.
// This simplifies the rest of the code and let us assume that the format is only contiguous channels first or channels last,
// Most tensors going through this `if` block won't need to go through unpacking, but those having C < 3 may
// have to (this means 2 copies are made). We could avoid the extra copy by handling non-contiguous input
// directly within unpack_rgb() and pack_rgb(), but initial attempts showed that this is fairly complex.
input = input.contiguous(at::MemoryFormat::ChannelsLast);
}
auto need_horizontal = xout != xin;
auto need_vertical = yout != yin;
int ksize_horiz, ksize_vert;
std::vector<at::Tensor> horiz_indices_weights, vert_indices_weights;
unsigned int horiz_weights_precision, vert_weights_precision;
bool skip_unpacking = (num_channels == 3 || num_channels == 4) && input.is_contiguous(at::MemoryFormat::ChannelsLast);
bool skip_packing = (num_channels == 3 || num_channels == 4) && output.is_contiguous(at::MemoryFormat::ChannelsLast);
if (need_horizontal) {
int interp_dim = 3;
auto stride = skip_unpacking ? num_channels : 4;
std::tie(horiz_indices_weights, ksize_horiz, horiz_weights_precision) =
F::compute_index_ranges_int16_weights(
/*input_size=*/xin,
/*output_size=*/xout,
/*stride=*/stride,
/*ndims=*/4,
/*reshape_dim=*/interp_dim,
/*align_corners=*/align_corners,
/*opt_scale=*/scales[interp_dim - 2],
/*antialias=*/antialias,
/*align_i32=*/true);
}
if (need_vertical) {
int interp_dim = 2;
auto stride = skip_unpacking ? num_channels * xout : 4 * xout;
std::tie(vert_indices_weights, ksize_vert, vert_weights_precision) =
F::compute_index_ranges_int16_weights(
/*input_size=*/yin,
/*output_size=*/yout,
/*stride=*/stride,
/*ndims=*/4,
/*reshape_dim=*/interp_dim,
/*align_corners=*/align_corners,
/*opt_scale=*/scales[interp_dim - 2],
/*antialias=*/antialias,
/*align_i32=*/true);
}
at::Tensor buffer_horiz, buffer_vert;
// Minor optimization: we can avoid allocating an extra buffer if we're performing
// horizontal-only or vertical-only interpolation, and if the tensor doesn't
// need repacking
if (need_horizontal && (need_vertical || !skip_packing)) {
auto c = skip_unpacking ? num_channels : 4;
buffer_horiz = at::empty({c, yin, xout}, input.options());
}
if (need_vertical && !skip_packing) {
auto c = skip_unpacking ? num_channels : 4;
buffer_vert = at::empty({c, yout, xout}, input.options());
}
for (const auto i : c10::irange(batch_size)) {
at::Tensor unpacked_input = skip_unpacking ? input[i] : unpack_rgb(input[i]);
at::Tensor unpacked_output;
if (need_horizontal) {
at::Tensor unpacked_output_temp = (need_vertical || !skip_packing) ? buffer_horiz : output[i];
if (skip_unpacking && num_channels == 3) {
ImagingResampleHorizontal<3>(
unpacked_output_temp,
unpacked_input,
ksize_horiz,
horiz_indices_weights,
horiz_weights_precision);
} else {
ImagingResampleHorizontal<4>(
unpacked_output_temp,
unpacked_input,
ksize_horiz,
horiz_indices_weights,
horiz_weights_precision);
}
unpacked_output = unpacked_input = unpacked_output_temp;
}
if (need_vertical) {
unpacked_output = skip_packing ? output[i] : buffer_vert;
ImagingResampleVertical(
unpacked_output,
unpacked_input,
ksize_vert,
vert_indices_weights,
vert_weights_precision
);
}
TORCH_INTERNAL_ASSERT(unpacked_output.defined());
if (!skip_packing) {
pack_rgb(unpacked_output, output[i]);
}
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free