Home / Class/ cpu_padding Class — pytorch Architecture

cpu_padding Class — pytorch Architecture

Architecture documentation for the cpu_padding class in PaddingKernel.cpp from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/cpu/PaddingKernel.cpp lines 130–231

template <typename scalar_t, typename PaddingType>
void cpu_padding(
    const Tensor& output_,
    const Tensor& input_,
    PaddingParams& p) {

  auto input = input_.contiguous();
  auto output = output_.contiguous();

  auto input_data = input.const_data_ptr<scalar_t>();
  auto output_data = output.data_ptr<scalar_t>();

  // fold nbatch and channels into single dimension for channels first.
  int64_t channels = p.nbatch * p.channels;

  int ndim = p.ndim;
  int64_t input_depth = ndim == 3 ? p.ishape[ndim - 3] : 1;
  int64_t input_height = ndim >=2 ? p.ishape[ndim - 2] : 1;
  int64_t input_width = p.ishape[ndim - 1];
  int64_t output_depth = ndim == 3 ? p.oshape[ndim - 3] : 1;
  int64_t output_height = ndim >= 2 ? p.oshape[ndim - 2] : 1;
  int64_t output_width = p.oshape[ndim - 1];
  int64_t pad_d = ndim == 3 ? p.pads[ndim - 3] : 0;
  int64_t pad_h = ndim >= 2 ? p.pads[ndim - 2] : 0;
  int64_t pad_w = p.pads[ndim - 1];
  int64_t offset_d = ndim == 3 ? p.offsets[ndim - 3] : 0;
  int64_t offset_h = ndim >= 2 ? p.offsets[ndim - 2] : 0;
  int64_t offset_w = p.offsets[ndim - 1];

  // do vectorized copy when output is overlapped with input on W,
  // only applies to positive padding
  auto loop = [=](scalar_t* out, const scalar_t* in, bool positive_padding) {
    if (positive_padding) {
      for (const auto ow : c10::irange(pad_w)) {
        int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
        out[ow] = in[iw];
      }
      copy_stub(out + pad_w, in, input_width);
      for (const auto ow : c10::irange(input_width + pad_w, output_width)) {
        int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
        out[ow] = in[iw];
      }
    } else {
      for (const auto ow : c10::irange(output_width)) {
        int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
        out[ow] = in[iw];
      }
    }
  };

  if (ndim == 1) {
    // parallel on N,C,W
    at::parallel_for(0, channels * output_width, 1, [&](int64_t begin, int64_t end) {
      int64_t c{0}, ow{0};
      data_index_init(begin, c, channels, ow, output_width);

      for (const auto i : c10::irange(begin, end)) {
        int64_t iw = PaddingType::index(ow, input_width, pad_w, offset_w);
        output_data[i] = input_data[c * input_width + iw];
        data_index_step(c, channels, ow, output_width);
      }
    });
  } else if (ndim == 2) {
    // parallel on N,C,H, vectorize on W
    at::parallel_for(0, channels * output_height, 1, [&](int64_t begin, int64_t end) {
      int64_t c{0}, oh{0};
      data_index_init(begin, c, channels, oh, output_height);

      for (const auto i : c10::irange(begin, end)) {
        int64_t ih = PaddingType::index(oh, input_height, pad_h, offset_h);
        scalar_t* output_ptr = output_data + i * output_width;
        const scalar_t* input_ptr = input_data + c * input_height * input_width + ih * input_width;

        loop(output_ptr, input_ptr, p.is_padding_positive_width);
        data_index_step(c, channels, oh, output_height);
      }
    });
  } else if (ndim == 3) {
    // parallel on N,C,D,H, vectorize on W
    at::parallel_for(0, channels * output_depth * output_height, 1, [&](int64_t begin, int64_t end) {
      int64_t c{0}, od{0}, oh{0};
      data_index_init(begin, c, channels, od, output_depth, oh, output_height);

      for (const auto i : c10::irange(begin, end)) {
        int64_t id = PaddingType::index(od, input_depth, pad_d, offset_d);
        int64_t ih = PaddingType::index(oh, input_height, pad_h, offset_h);
        scalar_t* output_ptr = output_data + i * output_width;
        const scalar_t* input_ptr = input_data + c * input_depth * input_height * input_width +
            id * input_height * input_width + ih * input_width;

        loop(output_ptr, input_ptr, p.is_padding_positive_width);
        data_index_step(c, channels, od, output_depth, oh, output_height);
      }
    });
  } else {
    TORCH_INTERNAL_ASSERT(false, "expect input dim to be 1d, 2d or 3d.");
  }

  if (!output_.is_contiguous()) {
    output_.copy_(output);
  }
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free