kSpatialDim Class — pytorch Architecture

Architecture documentation for the kSpatialDim class in QnnpackUtils.h from the pytorch codebase.
Class c
Entity Profile

Source Code

aten/src/ATen/native/quantized/cpu/QnnpackUtils.h lines 116–381
template <int kSpatialDim = 2>
struct PackedConvWeightsQnnp : public ConvPackedParamsBase<kSpatialDim> {
  PackedConvWeightsQnnp(
      std::unique_ptr<qnnpack::PrePackConvWeights> w,
      at::Tensor orig_weight,
      at::Tensor bias,
      torch::List<int64_t> stride,
      torch::List<int64_t> padding,
      torch::List<int64_t> output_padding,
      torch::List<int64_t> dilation,
      int64_t groups,
      bool transpose,
      std::optional<double> input_scale,
      std::vector<int64_t> kernel,
      at::Tensor w_scale,
      std::vector<uint8_t>&& w_zps,
      bool is_per_channel)
      : w(std::move(w)),
        orig_weight(std::move(orig_weight)),
        bias(std::move(bias)),
        stride_(std::move(stride)),
        padding_(std::move(padding)),
        output_padding_(std::move(output_padding)),
        dilation_(std::move(dilation)),
        groups_(groups),
        transpose_(transpose),
        is_per_channel_(is_per_channel),
        input_scale(input_scale),
        kernel_(std::move(kernel)),
        w_scales(std::move(w_scale)),
        w_zero_points(std::move(w_zps)) {
    const bool any_padding = std::any_of(
        padding_.begin(), padding_.end(), [](const auto& e) { return e != 0; });
    const size_t kernel_size =
        std::accumulate(kernel_.begin(), kernel_.end(), 1, std::multiplies<>());

    const size_t group_input_channels = transpose
        ? this->orig_weight.size(0) / groups
        : this->orig_weight.size(1);
    const size_t group_output_channels = transpose
        ? this->orig_weight.size(1)
        : this->orig_weight.size(0) / groups;

    const size_t kernel_depth = kSpatialDim == 3 ? kernel_[0] : 1;
    const size_t kernel_height = kernel_[kSpatialDim - 2];
    const size_t kernel_width = kernel_[kSpatialDim - 1];

    pytorch_qnnp_ukernel_type ukernel_type;
    if (transpose_) {
      ukernel_type = pytorch_qnnp_ukernel_type_conv;
    } else {
      ukernel_type = pytorch_qnnp_ukernel_type_none;

      const bool has_depthwise_dimensions =
          (kSpatialDim == 2 &&
           ((kernel_height == 3 && kernel_width == 3) ||
            (kernel_height == 5 && kernel_width == 5))) ||
          (kSpatialDim == 3 && kernel_height == 3 && kernel_width == 3 &&
           kernel_depth == 3);
      const bool has_depthwise_grouping =
          group_input_channels == 1 && group_output_channels == 1 && groups > 1;

      if (has_depthwise_dimensions && has_depthwise_grouping) {
        ukernel_type = pytorch_qnnp_ukernel_type_dwconv;
      } else if (
          kernel_size == 1 &&
          std::all_of(
              stride_.begin(),
              stride_.end(),
              [](const auto& e) { return e == 1; }) &&
          !any_padding) {
        ukernel_type = group_input_channels >= SIZE_MAX
            ? pytorch_qnnp_ukernel_type_xzp_gemm
            : pytorch_qnnp_ukernel_type_gemm;
      } else {
        ukernel_type = pytorch_qnnp_ukernel_type_conv;
      }
    }

    if (is_per_channel && ukernel_type == pytorch_qnnp_ukernel_type_xzp_gemm) {
      TORCH_INTERNAL_ASSERT(
          false, "Per channel quantized weights are not supported for XZP kernels");
    }

    pytorch_qnnp_operator_t convolution{nullptr};
    // Initially all the params are set to zero.
    convolution = static_cast<pytorch_qnnp_operator_t>(
        calloc(1, sizeof(struct pytorch_qnnp_operator)));
    if (convolution == nullptr) {
      TORCH_INTERNAL_ASSERT(
          false, "failed to allocate %zu bytes for pytorch_qnnp_operator structure",
          sizeof(struct pytorch_qnnp_operator));
    }

    convolution_op =
        std::unique_ptr<pytorch_qnnp_operator, QnnpackOperatorDeleter>(
            convolution);

    // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
    convolution->ukernel_type = ukernel_type;
    convolution->groups = groups;
    convolution->group_input_channels = group_input_channels;
    convolution->group_output_channels = group_output_channels;
    convolution->kernel_depth = kernel_depth;
    convolution->kernel_height = kernel_height;
    convolution->kernel_width = kernel_width;
    convolution->stride_depth = kSpatialDim == 3 ? stride_[0] : 1;
    convolution->stride_height = stride_[kSpatialDim - 2];
    convolution->stride_width = stride_[kSpatialDim - 1];
    convolution->dilation_depth = kSpatialDim == 3 ? dilation_[0] : 1;
    convolution->dilation_height = dilation_[kSpatialDim - 2];
    convolution->dilation_width = dilation_[kSpatialDim - 1];
    convolution->input_padding_height = padding_[kSpatialDim - 2];
    convolution->input_padding_width = padding_[kSpatialDim - 1];
    convolution->input_padding_depth = kSpatialDim == 3 ? padding_[0] : 0;
    convolution->per_channel = is_per_channel_;
    convolution->transpose = transpose_;

    const uint32_t kr = pytorch_qnnp_params.q8conv.kr;
    const size_t k_stride = (group_input_channels + (kr - 1)) & -kr;

    size_t zero_size = sizeof(uint8_t) * k_stride;
    size_t zero_offset = 0;

    if (transpose_) {
      convolution->adjustment_width = output_padding_[1];
      convolution->adjustment_height = output_padding_[0];
      if (group_input_channels < 8) {
        zero_size += 8;
        zero_offset = 8;
      }
    } else {
      zero_buffer_size = 0;
      if (any_padding) {
        zero_size = 0;
        zero_offset = 0;
        if (ukernel_type == pytorch_qnnp_ukernel_type_dwconv) {
          const uint32_t cr = pytorch_qnnp_params.q8dw9.cr;
          const size_t group_stride = (groups + (cr - 1)) & -cr;
          if (groups >= 8) {
            zero_size = sizeof(uint8_t) * group_stride;
            zero_offset = 0;
          } else {
            zero_size = sizeof(uint8_t) * group_stride + 8;
            zero_offset = sizeof(uint8_t) * 8;
          }
        } else if (
            ukernel_type == pytorch_qnnp_ukernel_type_conv ||
            ukernel_type == pytorch_qnnp_ukernel_type_gemm) {
          if (group_input_channels >= 8) {
            zero_size = sizeof(uint8_t) * k_stride;
            zero_offset = 0;
          } else {
            zero_size = sizeof(uint8_t) * k_stride + 8;
            zero_offset = 8;
          }
        }
      }
    }

    // NOLINTNEXTLINE(clang-analyzer-optin.portability.UnixAPI)
    void* zero_buffer = malloc(zero_size);
    if (zero_buffer == nullptr) {
      pytorch_qnnp_delete_operator(convolution);
      TORCH_INTERNAL_ASSERT(
          false, "failed to allocate %zu bytes for zero padding",
          zero_size);
    }
    // Need to set to input zero point
    // memset(zero_buffer, input_zero_point, zero_size);
    zero_buffer_size = zero_size;
    convolution->zero_buffer = zero_buffer;
    convolution->zero_pointer = (void*)((uintptr_t)zero_buffer + zero_offset);
  }

  std::unique_ptr<pytorch_qnnp_operator, QnnpackOperatorDeleter> convolution_op;
  #ifdef USE_XNNPACK
  xnnpack_operator xnnp_convolution_op;
  #endif  // USE_XNNPACK
  std::unique_ptr<qnnpack::PrePackConvWeights> w;
  at::Tensor orig_weight;
  at::Tensor bias;
  torch::List<int64_t> stride_;
  torch::List<int64_t> padding_;
  torch::List<int64_t> output_padding_;
  torch::List<int64_t> dilation_;
  int64_t groups_;
  bool transpose_;
  bool is_per_channel_;
  std::optional<double> input_scale;
  std::vector<int64_t> kernel_;
  at::Tensor w_scales;
  std::vector<uint8_t> w_zero_points;
  std::vector<float> requantization_scales;
  size_t zero_buffer_size;

  at::Tensor apply(
      const at::Tensor& input,
      double output_scale,
      int64_t output_zero_point) override;

  at::Tensor apply_relu(
      const at::Tensor& input,
      double output_scale,
      int64_t output_zero_point) override;

  at::Tensor apply_dynamic(
      const at::Tensor& input,
      bool reduce_range=false) override;

  std::tuple<at::Tensor, std::optional<at::Tensor>> unpack() override;

  static c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> prepack(
      at::Tensor weight,
      std::optional<at::Tensor> bias,
      torch::List<int64_t> stride,
      torch::List<int64_t> padding,
      torch::List<int64_t> output_padding,
      torch::List<int64_t> dilation,
      int64_t groups,
      bool transpose);

  torch::List<int64_t> stride() const override {
    return stride_;
  }

  torch::List<int64_t> padding() const override {
    return padding_;
  }

  torch::List<int64_t> output_padding() const override {
    return output_padding_;
  }

  torch::List<int64_t> dilation() const override {
    return dilation_;
  }

  int64_t groups() const override {
    return groups_;
  }

  bool transpose() const override {
    return transpose_;
  }

  bool per_channel() const {
    return is_per_channel_;
  }

 private:
  std::mutex qnnp_mutex_;
  template <bool ReluFused>
  at::Tensor apply_impl(
      const at::Tensor& input,
      double output_scale,
      int64_t output_zero_point);

#ifdef USE_XNNPACK
  template <typename scalar_t, bool ReluFused>
  at::Tensor apply_impl_xnnp(
      const at::Tensor& input,
      double output_scale,
      int64_t output_zero_point);
#endif // USE_XNNPACK
};
Source

View on GitHub
Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free