cpu_padding_backward_channels_last Class — pytorch Architecture
Architecture documentation for the cpu_padding_backward_channels_last class in PaddingKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/PaddingKernel.cpp lines 395–474
template <typename scalar_t, typename PaddingType>
void cpu_padding_backward_channels_last(
const Tensor& grad_input_,
const Tensor& grad_output_,
PaddingParams& p) {
auto memory_format = p.ndim == 2
? at::MemoryFormat::ChannelsLast
: at::MemoryFormat::ChannelsLast3d;
auto grad_input = grad_input_.contiguous(memory_format);
auto grad_output = grad_output_.contiguous(memory_format);
auto grad_input_data = grad_input.data_ptr<scalar_t>();
auto grad_output_data = grad_output.const_data_ptr<scalar_t>();
int64_t nbatch = p.nbatch;
int64_t channels = p.channels;
int ndim = p.ndim;
int64_t input_depth = ndim == 3 ? p.ishape[ndim - 3] : 1;
int64_t input_height = ndim >=2 ? p.ishape[ndim - 2] : 1;
int64_t input_width = p.ishape[ndim - 1];
int64_t output_depth = ndim == 3 ? p.oshape[ndim - 3] : 1;
int64_t output_height = ndim >= 2 ? p.oshape[ndim - 2] : 1;
int64_t output_width = p.oshape[ndim - 1];
int64_t pad_d = ndim == 3 ? p.pads[ndim - 3] : 0;
int64_t pad_h = ndim >= 2 ? p.pads[ndim - 2] : 0;
int64_t pad_w = p.pads[ndim - 1];
int64_t offset_d = ndim == 3 ? p.offsets[ndim - 3] : 0;
int64_t offset_h = ndim >= 2 ? p.offsets[ndim - 2] : 0;
int64_t offset_w = p.offsets[ndim - 1];
if (ndim == 2) {
// parallel on N, sequential on H,W, vectorize on C
at::parallel_for(0, nbatch, 1, [&](int64_t begin, int64_t end) {
for (const auto n : c10::irange(begin, end)) {
for (const auto oh : c10::irange(output_height)) {
int64_t ih = PaddingType::index(oh, input_height, pad_h, offset_h);
for (const auto ow : c10::irange(output_width)) {
int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
scalar_t* grad_input_ptr = grad_input_data +
(n * input_height * input_width + ih * input_width + iw) * channels;
const scalar_t* grad_output_ptr = grad_output_data +
(n * output_height * output_width + oh * output_width + ow) * channels;
add_stub(grad_input_ptr, grad_output_ptr, channels);
}
}
}
});
} else if (ndim == 3) {
// parallel on N, sequential on D,H,W, vectorize on C
at::parallel_for(0, nbatch, 1, [&](int64_t begin, int64_t end) {
for (const auto n : c10::irange(begin, end)) {
for (const auto od : c10::irange(output_depth)) {
int64_t id = PaddingType::index(od, input_depth, pad_d, offset_d);
for (const auto oh : c10::irange(output_height)) {
int64_t ih = PaddingType::index(oh, input_height, pad_h, offset_h);
for (const auto ow : c10::irange(output_width)) {
int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
scalar_t* grad_input_ptr = grad_input_data +
(n * input_depth * input_height * input_width + id * input_height * input_width +
ih * input_width + iw) * channels;
const scalar_t* grad_output_ptr = grad_output_data +
(n * output_depth * output_height * output_width + od * output_height * output_width +
oh * output_width + ow) * channels;
add_stub(grad_input_ptr, grad_output_ptr, channels);
}
}
}
}
});
} else {
TORCH_INTERNAL_ASSERT(false, "expect input dim to be 2d or 3d.");
}
if (!grad_input_.is_contiguous(memory_format)) {
grad_input_.copy_(grad_input);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free