Unfold3dAccKernelImpl Class — pytorch Architecture
Architecture documentation for the Unfold3dAccKernelImpl class in Unfold3d.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/Unfold3d.cpp lines 355–431
template <typename T>
void Unfold3dAccKernelImpl(
int64_t C,
int64_t X_D,
int64_t X_H,
int64_t X_W,
int64_t Y_D,
int64_t Y_H,
int64_t Y_W,
int64_t kernel_d,
int64_t kernel_h,
int64_t kernel_w,
int64_t stride_d,
int64_t stride_h,
int64_t stride_w,
int64_t pad_d,
int64_t pad_h,
int64_t pad_w,
const T* src,
T* dst) {
if (pad_d == 0 && pad_h == 0 && pad_w == 0) {
Unfold3dZeroPaddingAccKernelImpl<T>(
C,
X_D,
X_H,
X_W,
Y_D,
Y_H,
Y_W,
kernel_d,
kernel_h,
kernel_w,
stride_d,
stride_h,
stride_w,
src,
dst);
return;
}
const int64_t X_size = X_D * X_H * X_W;
const int64_t Y_size = Y_D * Y_H * Y_W;
const int64_t kernel_size = kernel_d * kernel_h * kernel_w;
at::parallel_for(0, C, 0, [=](int64_t begin, int64_t end) {
std::memset(dst + begin * X_size, 0, (end - begin) * X_size * sizeof(T));
for (const auto c : c10::irange(begin, end)) {
T* dst_ptr = dst + c * X_size;
for (const auto kd : c10::irange(kernel_d)) {
for (const auto kh : c10::irange(kernel_h)) {
for (const auto kw : c10::irange(kernel_w)) {
const int64_t p =
c * kernel_size + kd * kernel_h * kernel_w + kh * kernel_w + kw;
const T* src_ptr = src + p * Y_size;
for (const auto yd : c10::irange(Y_D)) {
const int64_t xd = yd * stride_d - pad_d + kd;
if (!IsAGeZeroAndALtB(xd, X_D)) {
continue;
}
for (const auto yh : c10::irange(Y_H)) {
const int64_t xh = yh * stride_h - pad_h + kh;
if (!IsAGeZeroAndALtB(xh, X_H)) {
continue;
}
for (const auto yw : c10::irange(Y_W)) {
const int64_t xw = yw * stride_w - pad_w + kw;
if (IsAGeZeroAndALtB(xw, X_W)) {
dst_ptr[xd * X_H * X_W + xh * X_W + xw] +=
src_ptr[yd * Y_H * Y_W + yh * Y_W + yw];
}
}
}
}
}
}
}
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free