_bincount_cpu_template Class — pytorch Architecture

Architecture documentation for the _bincount_cpu_template class in SummaryOps.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

aten/src/ATen/native/SummaryOps.cpp lines 23–80

template <typename input_t, typename weights_t>
Tensor _bincount_cpu_template(
    const Tensor& self,
    const Tensor& weights,
    int64_t minlength) {
  if (minlength < 0) {
    TORCH_CHECK(false, "minlength should be >= 0");
  }
  if (self.dim() == 1 && self.numel() == 0) {
    return at::zeros({minlength}, kLong);
  }
  if (self.dim() != 1 || *self.min().data_ptr<input_t>() < 0) {
    TORCH_CHECK(false, "bincount only supports 1-d non-negative integral inputs.");
  }

  // Ensure max_val < 2 ^ 63 - 1 (9223372036854775807)
  auto max_val = *self.max().data_ptr<input_t>();
  if (max_val >= std::numeric_limits<int64_t>::max()) {
    TORCH_CHECK(false,
        "maximum value of input overflowed, it should be < ",
        std::numeric_limits<int64_t>::max(),
        " but got ",
        max_val
    );
  }

  bool has_weights = weights.defined();
  if (has_weights && (weights.dim() != 1 || weights.size(0) != self.size(0))) {
    TORCH_CHECK(false, "weights should be 1-d and have the same length as input");
  }

  Tensor output;
  int64_t self_size = self.size(0);
  int64_t nbins = static_cast<int64_t>(max_val) + 1L;
  nbins = std::max(nbins, minlength); // at least minlength # of bins

  const input_t* self_p = self.const_data_ptr<input_t>();
  if (has_weights) {
    output = at::zeros(
        {nbins},
        optTypeMetaToScalarType(weights.options().dtype_opt()),
        weights.options().layout_opt(),
        weights.options().device_opt(),
        weights.options().pinned_memory_opt());
    weights_t* output_p = output.data_ptr<weights_t>();
    const weights_t* weights_p = weights.const_data_ptr<weights_t>();
    for (const auto i : c10::irange(self_size)) {
      output_p[self_p[i]] += weights_p[i];
    }
  } else {
    output = at::zeros({nbins}, kLong);
    int64_t* output_p = output.data_ptr<int64_t>();
    for (const auto i : c10::irange(self_size)) {
      output_p[self_p[i]] += 1L;
    }
  }
  return output;
}

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free