QLinearOnednn Class — pytorch Architecture
Architecture documentation for the QLinearOnednn class in qlinear.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/qlinear.cpp lines 1578–1640
class QLinearOnednn final {
public:
static Tensor run_pointwise(
Tensor act, // int8 CPU tensor, not QTensor
double act_scale,
int64_t act_zero_point,
Tensor onednn_weight, // int8 tensor from MkldnnCPU
Tensor weight_scales,
Tensor weight_zero_points,
std::optional<Tensor> bias,
double output_scale,
int64_t output_zero_point,
std::optional<c10::ScalarType> output_dtype,
std::string_view post_op_name,
torch::List<std::optional<at::Scalar>> post_op_args,
std::string_view post_op_algorithm) {
#if AT_MKLDNN_ENABLED()
static std::optional<at::Tensor> other = std::nullopt;
static const std::string_view binary_post_op = "none";
return linear_int8_with_onednn_weight(
act, act_scale, act_zero_point,
onednn_weight, weight_scales, weight_zero_points,
bias, output_scale, output_zero_point, output_dtype,
other, /*other scale*/1.0, /*other zp*/0,
binary_post_op, /*binary alpha*/1.0,
post_op_name, post_op_args, post_op_algorithm
);
#endif
TORCH_CHECK(false, "Unimplemented (int8 linear with packed weight and bias)");
}
static Tensor run_pointwise_binary(
Tensor act, // int8 CPU tensor, not QTensor
double act_scale,
int64_t act_zero_point,
Tensor onednn_weight, // int8 tensor from MkldnnCPU
Tensor weight_scales,
Tensor weight_zero_points,
std::optional<at::Tensor> other, // extra input for binary post-op
std::optional<Tensor> bias,
double output_scale,
int64_t output_zero_point,
std::optional<c10::ScalarType> output_dtype,
double other_scale,
int64_t other_zero_point,
std::string_view binary_post_op, // e.g. "none", "sum", "add"
double binary_alpha,
std::string_view unary_post_op, // e.g. "none", "relu"
torch::List<std::optional<at::Scalar>> unary_post_op_args,
std::string_view unary_post_op_algorithm) {
#if AT_MKLDNN_ENABLED()
return linear_int8_with_onednn_weight(
act, act_scale, act_zero_point,
onednn_weight, weight_scales, weight_zero_points,
bias, output_scale, output_zero_point, output_dtype,
other, other_scale, other_zero_point,
binary_post_op, binary_alpha,
unary_post_op, unary_post_op_args, unary_post_op_algorithm
);
#endif
TORCH_CHECK(false, "Unimplemented (int8 linear with packed weight and bias)");
}
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free