num_channels Class — pytorch Architecture
Architecture documentation for the num_channels class in UpSampleKernelAVXAntialias.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/UpSampleKernelAVXAntialias.h lines 156–233
template<int num_channels>
void ImagingResampleHorizontal(
const at::Tensor & unpacked_output,
const at::Tensor & unpacked_input,
int ksize,
const std::vector<at::Tensor>& horiz_indices_weights,
unsigned int horiz_weights_precision) {
// Interpolation horizontal pass: we compute x-axis (image width) interpolation outputs.
// Input data is stored as
// input = [r[0], g[0], b[0], a[0], r[1], g[1], b[1], a[1], r[2], g[2], b[2], a[2], ...]
// Weights are float values computed for each output pixel and rescaled to uint16:
// weights[i] = [w[i, 0], w[i, 1], ..., w[i, K-1]]
// We want to compute the output as following:
// output = [oR[0], oG[0], oB[0], oA[0], oR[1], oG[1], oB[1], oA[1], ...]
// where
// oR[yoffset + i] = r[yoffset + xmin[i]] * w[i, 0] + ... + r[yoffset + xmin[i] + K-1] * w[i, K-1]
// oG[yoffset + i] = g[yoffset + xmin[i]] * w[i, 0] + ... + g[yoffset + xmin[i] + K-1] * w[i, K-1]
// oB[yoffset + i] = b[yoffset + xmin[i]] * w[i, 0] + ... + b[yoffset + xmin[i] + K-1] * w[i, K-1]
//
// TODO: we may want to merge that into the fallback code (currently called
// basic_loop_aa_horizontal<uint8_t>)
// Although this may not be needed if / when we port all this code to use
// Vec.h since this would potentially give us another fall-back implem
const int16_t* kk = (int16_t*)(horiz_indices_weights[3].const_data_ptr<double>());
auto xout = unpacked_output.size(2);
auto yout = unpacked_output.size(1);
auto xin = unpacked_input.size(2);
TORCH_INTERNAL_ASSERT(num_channels == unpacked_input.size(0));
const int64_t* idx_ptr_xmin = horiz_indices_weights[0].const_data_ptr<int64_t>();
const int64_t* idx_ptr_size = horiz_indices_weights[1].const_data_ptr<int64_t>();
uint8_t* unpacked_output_p = unpacked_output.data_ptr<uint8_t>();
const uint8_t* unpacked_input_p = unpacked_input.const_data_ptr<uint8_t>();
int64_t yy = 0;
auto xout_stride = xout * num_channels;
auto xin_stride = xin * num_channels;
for (; yy < yout - 3; yy += 4) {
ImagingResampleHorizontalConvolution8u4x(
unpacked_output_p + yy * xout_stride,
unpacked_output_p + (yy + 1) * xout_stride,
unpacked_output_p + (yy + 2) * xout_stride,
unpacked_output_p + (yy + 3) * xout_stride,
xout,
unpacked_input_p + yy * xin_stride,
unpacked_input_p + (yy + 1) * xin_stride,
unpacked_input_p + (yy + 2) * xin_stride,
unpacked_input_p + (yy + 3) * xin_stride,
xin,
idx_ptr_xmin,
idx_ptr_size,
kk,
ksize,
horiz_weights_precision,
num_channels,
yy + 3 == yout - 1);
}
for (; yy < yout; yy++) {
ImagingResampleHorizontalConvolution8u(
unpacked_output_p + yy * xout_stride,
xout,
unpacked_input_p + yy * xin_stride,
xin,
idx_ptr_xmin,
idx_ptr_size,
kk,
ksize,
horiz_weights_precision,
num_channels,
yy == yout - 1);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free