unfolded2d_copy Class — pytorch Architecture
Architecture documentation for the unfolded2d_copy class in Unfold2d.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/Unfold2d.cpp lines 228–327
template <typename scalar_t>
void unfolded2d_copy(
const scalar_t* input_data,
scalar_t* finput_data,
int64_t kH,
int64_t kW,
int64_t dH,
int64_t dW,
int64_t padH,
int64_t padW,
int64_t n_input_plane,
int64_t input_height,
int64_t input_width,
int64_t output_height,
int64_t output_width) {
at::parallel_for(
0, n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
int64_t nip = k / (kH * kW);
int64_t rest = k % (kH * kW);
int64_t kh = rest / kW;
int64_t kw = rest % kW;
scalar_t* dst = finput_data +
nip * ((size_t)kH * kW * output_height * output_width) +
kh * ((size_t)kW * output_height * output_width) +
kw * ((size_t)output_height * output_width);
const scalar_t* src =
input_data + nip * ((size_t)input_height * input_width);
if (padW > 0 || padH > 0) {
for (int64_t y = 0; y < output_height; y++) {
auto iy = y * dH - padH + kh;
if (iy < 0 || iy >= input_height) {
memset(
dst + (size_t)y * output_width,
0,
sizeof(scalar_t) * output_width);
} else {
if (dW == 1) {
auto ix = 0 - padW + kw;
auto lpad = std::max<int64_t>(0, padW - kw);
auto rpad = std::max<int64_t>(0, padW - (kW - kw - 1));
if (output_width - rpad - lpad <= 0) {
memset(
dst + (size_t)y * output_width,
0,
sizeof(scalar_t) * output_width);
} else {
if (lpad > 0)
memset(
dst + (size_t)y * output_width,
0,
sizeof(scalar_t) * lpad);
memcpy(
dst + (size_t)y * output_width + lpad,
src + (size_t)iy * input_width + ix + lpad,
sizeof(scalar_t) * (output_width - rpad - lpad));
if (rpad > 0)
memset(
dst + (size_t)y * output_width + output_width - rpad,
0,
sizeof(scalar_t) * rpad);
}
} else {
for (int64_t x = 0; x < output_width; x++) {
auto ix = x * dW - padW + kw;
if (ix < 0 || ix >= input_width)
memset(
dst + (size_t)y * output_width + x,
0,
sizeof(scalar_t) * 1);
else
memcpy(
dst + (size_t)y * output_width + x,
src + (size_t)iy * input_width + ix,
sizeof(scalar_t) * 1);
}
}
}
}
} else {
for (int64_t y = 0; y < output_height; y++) {
auto iy = y * dH + kh;
auto ix = 0 + kw;
if (dW == 1)
memcpy(
dst + (size_t)y * output_width,
src + (size_t)iy * input_width + ix,
sizeof(scalar_t) * output_width);
else {
for (int64_t x = 0; x < output_width; x++)
memcpy(
dst + (size_t)y * output_width + x,
src + (size_t)iy * input_width + ix + x * dW,
sizeof(scalar_t) * 1);
}
}
}
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free