_fake_quant_per_channel_cachemask_cpu_helper Class — pytorch Architecture

Architecture documentation for the _fake_quant_per_channel_cachemask_cpu_helper class in QuantizedOpKernels.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp lines 2793–2851

template <typename SelfType>
void _fake_quant_per_channel_cachemask_cpu_helper(
    TensorIterator& iter,
    TensorIterator& iter_mask,
    const int64_t quant_min,
    const int64_t quant_max) {

  const auto& zero_point_dtype = iter.input_dtype(2);

  if(at::isFloatingType(zero_point_dtype)){
    // When zero_point is float, quantize mirroring affine quantizer equation
    // Xq = Round(Xf * inv_scale + zero_point)
    // where zero_point is in float.
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(zero_point_dtype, "fake_quantize_channel_cachemask_cpu_zero_point_handling", [&] {
      // write mask
      cpu_kernel(iter_mask, [=](SelfType self, float scale, scalar_t zero_point) -> bool {
        float inv_scale = 1.0f / scale;
        const auto qval = std::lrintf(zero_point + (self * inv_scale));
        return ((quant_min <= qval) && (qval <= quant_max));
      });

      // write fake_quant
      cpu_kernel(iter, [=](SelfType self, float scale, scalar_t zero_point) -> SelfType {
        float inv_scale = 1.0f / scale;
        // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
        return (std::fmin(
                    std::fmax(
                        std::lrintf(zero_point + self * inv_scale),
                        quant_min),
                    quant_max) -
                zero_point) *
            scale;
      });
    });

  } else {
      // write mask
      cpu_kernel(iter_mask, [=](SelfType self, float scale, int32_t zero_point) -> bool {
        float inv_scale = 1.0f / scale;
        const auto qval = static_cast<int64_t>(zero_point + std::nearbyint(self * inv_scale));
        return ((quant_min <= qval) && (qval <= quant_max));
      });

      // write fake_quant
      cpu_kernel(iter, [=](SelfType self, float scale, int32_t zero_point) -> SelfType {
        float inv_scale = 1.0f / scale;
        // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
        return (std::fmin(
                    std::fmax(
                        static_cast<int64_t>(
                            zero_point + std::nearbyint(self * inv_scale)),
                        quant_min),
                    quant_max) -
                zero_point) *
            scale;
      });
  }

}

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free