kSpatialDim Class — pytorch Architecture
Architecture documentation for the kSpatialDim class in qconv_prepack.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp lines 26–187
template <int kSpatialDim>
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeight<
kSpatialDim>::
prepack(
at::Tensor weight,
std::optional<at::Tensor> bias,
torch::List<int64_t> stride,
torch::List<int64_t> padding,
torch::List<int64_t> output_padding,
torch::List<int64_t> dilation,
int64_t groups,
bool transpose) {
TORCH_CHECK(
weight.ndimension() == kSpatialDim + 2,
"Weights are expected to have ",
kSpatialDim + 2,
" dimensions");
TORCH_CHECK(
stride.size() == kSpatialDim,
"stride should contain ",
kSpatialDim,
" elements for ",
kSpatialDim,
"D convolution.");
TORCH_CHECK(
padding.size() == kSpatialDim,
"Specify front/top/left padding only. "
"end/bottom/right padding assumed to be equal to front/top/left");
TORCH_CHECK(
!transpose || output_padding.size() == kSpatialDim,
"quantized::conv_prepack: Specify top/left output padding "
"only. bottom/right padding assumed to be equal to top/left");
TORCH_CHECK(
dilation.size() == kSpatialDim,
"dilation should contain ",
kSpatialDim,
" elements for ",
kSpatialDim,
"D convolution.");
const int input_channels = transpose ? weight.size(0)
: weight.size(1) * groups;
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
const int output_channels = transpose ? weight.size(1) * groups
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
: weight.size(0);
const int kernel_d = kSpatialDim == 2 ? 1 : weight.size(2);
const int kernel_h = weight.size(kSpatialDim);
const int kernel_w = weight.size(kSpatialDim + 1);
// mini-batch doesn't have any impact on how we pack weights
// so we pass it as 1
// Input image height/width also don't have any impact on how we pack
// weights so we can pass any values
const fbgemm::conv_param_t<kSpatialDim> conv_p =
at::native::fbgemm_utils::MakeFbgemmConvParam<kSpatialDim>(
1, // dummy batch size
input_channels,
output_channels,
kSpatialDim == 2 ? std::vector<int>{28, 28} // dummy image size
: std::vector<int>{28, 28, 28},
groups,
kSpatialDim == 2 ? std::vector<int>{kernel_h, kernel_w}
: std::vector<int>{kernel_d, kernel_h, kernel_w},
std::vector<int>(stride.begin(), stride.end()),
std::vector<int>(padding.begin(), padding.end()),
std::vector<int>(dilation.begin(), dilation.end()),
std::vector<int>(output_padding.begin(), output_padding.end()),
transpose);
const auto qtype = weight.qscheme();
std::vector<int32_t> zero_points;
if (qtype == c10::kPerTensorAffine) {
zero_points = {static_cast<int32_t>(weight.q_zero_point())};
} else if (qtype == c10::kPerChannelAffine) {
TORCH_CHECK(
!transpose,
"Per Channel Quantization is currently disabled for transposed conv");
zero_points.resize(output_channels);
for (const auto i : c10::irange(output_channels)) {
zero_points[i] = weight.q_per_channel_zero_points()[i].item<int32_t>();
}
} else {
TORCH_CHECK(false, "Unsupported qscheme: ", toString(qtype));
}
// FBGEMM expects weights to be in channels last
// TODO: Change this when ChannelsLast3d is ready.
// FBGEMM needs G OC/G kDim0 ... kDimN IC/G
// for both conv and conv transpose
// but PyTorch lays them out as {out_c, in_c/groups, kH, kW}
// (or for ConvTranspose {in_c, out_c/groups, kH, kW})
const at::Tensor weight_nhwc =
at::native::fbgemm_utils::ConvertConvWeightsToChannelLastTensor<kSpatialDim>(weight, groups, transpose);
const int8_t* weight_data_int8 =
reinterpret_cast<int8_t*>(weight_nhwc.data_ptr<c10::qint8>());
std::vector<int32_t> col_offsets(output_channels);
// compute column offsets (Similar to
// fbgemm::col_offsets_with_zero_pt_s8acc32_ref) please note that offsets
// include the sum of columns as well as the scalar term weight_zero_point *
// KDim
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
const int input_channels_per_group = input_channels / groups;
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
const int output_channels_per_group = output_channels / groups;
const int inner_size =
kernel_d * kernel_h * kernel_w * input_channels_per_group;
for (const auto g : c10::irange(groups)) {
for (const auto i : c10::irange(output_channels_per_group)) {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
const int c = g * output_channels_per_group + i;
int32_t sum = 0;
for (const auto j : c10::irange(inner_size)) {
sum += static_cast<int32_t>(weight_data_int8[c * inner_size + j]);
}
if (qtype == c10::kPerTensorAffine) {
col_offsets[c] = sum - zero_points[0] * inner_size;
} else {
col_offsets[c] = sum - zero_points[c] * inner_size;
}
}
}
std::vector<float> scales;
if (qtype == c10::kPerTensorAffine) {
scales = {static_cast<float>(weight.q_scale())};
} else if (qtype == c10::kPerChannelAffine) {
scales.resize(output_channels);
for (const auto i : c10::irange(output_channels)) {
scales[i] = weight.q_per_channel_scales()[i].item<float>();
}
}
std::optional<at::Tensor> bias_contig;
if (bias.has_value()) {
at::Tensor bias_vec = bias.value();
TORCH_CHECK(bias_vec.dim() == 1, "bias should be a vector (1D Tensor)");
TORCH_CHECK(
bias_vec.size(0) == output_channels,
"bias should have K elements: " + std::to_string(output_channels));
bias_contig = bias->contiguous();
}
auto ret_ptr = c10::make_intrusive<PackedConvWeight<kSpatialDim>>(
PackedConvWeight<kSpatialDim>{
std::make_unique<fbgemm::PackWeightsForConv<kSpatialDim>>(
conv_p, weight_data_int8),
bias_contig,
stride,
padding,
output_padding,
dilation,
groups,
transpose,
col_offsets,
kSpatialDim == 2 ? std::vector<int64_t>{kernel_h, kernel_w}
: std::vector<int64_t>{kernel_d, kernel_h, kernel_w},
scales,
zero_points,
qtype});
return ret_ptr;
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free