QLinearOnednn Class — pytorch Architecture

Architecture documentation for the QLinearOnednn class in qlinear.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

aten/src/ATen/native/quantized/cpu/qlinear.cpp lines 1578–1640

class QLinearOnednn final {
 public:
  static Tensor run_pointwise(
      Tensor act, // int8 CPU tensor, not QTensor
      double act_scale,
      int64_t act_zero_point,
      Tensor onednn_weight, // int8 tensor from MkldnnCPU
      Tensor weight_scales,
      Tensor weight_zero_points,
      std::optional<Tensor> bias,
      double output_scale,
      int64_t output_zero_point,
      std::optional<c10::ScalarType> output_dtype,
      std::string_view post_op_name,
      torch::List<std::optional<at::Scalar>> post_op_args,
      std::string_view post_op_algorithm) {
#if AT_MKLDNN_ENABLED()
    static std::optional<at::Tensor> other = std::nullopt;
    static const std::string_view binary_post_op = "none";
    return linear_int8_with_onednn_weight(
        act, act_scale, act_zero_point,
        onednn_weight, weight_scales, weight_zero_points,
        bias, output_scale, output_zero_point, output_dtype,
        other, /*other scale*/1.0, /*other zp*/0,
        binary_post_op, /*binary alpha*/1.0,
        post_op_name, post_op_args, post_op_algorithm
    );
#endif
    TORCH_CHECK(false, "Unimplemented (int8 linear with packed weight and bias)");
  }

  static Tensor run_pointwise_binary(
      Tensor act, // int8 CPU tensor, not QTensor
      double act_scale,
      int64_t act_zero_point,
      Tensor onednn_weight, // int8 tensor from MkldnnCPU
      Tensor weight_scales,
      Tensor weight_zero_points,
      std::optional<at::Tensor> other, // extra input for binary post-op
      std::optional<Tensor> bias,
      double output_scale,
      int64_t output_zero_point,
      std::optional<c10::ScalarType> output_dtype,
      double other_scale,
      int64_t other_zero_point,
      std::string_view binary_post_op, // e.g. "none", "sum", "add"
      double binary_alpha,
      std::string_view unary_post_op, // e.g. "none", "relu"
      torch::List<std::optional<at::Scalar>> unary_post_op_args,
      std::string_view unary_post_op_algorithm) {
#if AT_MKLDNN_ENABLED()
    return linear_int8_with_onednn_weight(
        act, act_scale, act_zero_point,
        onednn_weight, weight_scales, weight_zero_points,
        bias, output_scale, output_zero_point, output_dtype,
        other, other_scale, other_zero_point,
        binary_post_op, binary_alpha,
        unary_post_op, unary_post_op_args, unary_post_op_algorithm
    );
#endif
    TORCH_CHECK(false, "Unimplemented (int8 linear with packed weight and bias)");
  }
};

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free