QLinearPackWeightFp16 Class — pytorch Architecture
Architecture documentation for the QLinearPackWeightFp16 class in qlinear_prepack.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp lines 616–653
class QLinearPackWeightFp16 final {
public:
static c10::intrusive_ptr<LinearPackedParamsBase> run(
at::Tensor weight,
std::optional<Tensor> bias) {
auto& ctx = at::globalContext();
#ifdef USE_FBGEMM
// temporarily convert weight back to fp32, needs to be fixed
// after fbgemm fixes the interface for their prepacking op (take fp16 input0
weight = weight.to(ScalarType::Float);
if (ctx.qEngine() == at::QEngine::FBGEMM ||
ctx.qEngine() == at::QEngine::X86) {
return PackedLinearWeightFp16::prepack(
std::move(weight), std::move(bias));
}
#endif // USE_FBGEMM
#ifdef USE_PYTORCH_QNNPACK
if (ctx.qEngine() == at::QEngine::QNNPACK) {
TORCH_CHECK(
false,
"quantized::linear_prepack_fp16 is currently "
"not supported by QNNPACK");
}
#endif // USE_PYTORCH_QNNPACK
#if AT_MKLDNN_ENABLED()
if (ctx.qEngine() == at::QEngine::ONEDNN) {
TORCH_CHECK(
false,
"quantized::linear_prepack_fp16 is currently "
"not supported by ONEDNN");
}
#endif // #if AT_MKLDNN_ENABLED()
TORCH_CHECK(
false,
"Didn't find engine for operation quantized::linear_prepack_fp16 ",
toString(ctx.qEngine()));
}
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free