is_same_v Class — pytorch Architecture
Architecture documentation for the is_same_v class in SoftMaxKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/SoftMaxKernel.cpp lines 95–159
template<typename scalar_t>
inline typename std::enable_if_t<!std::is_same_v<scalar_t, at::opmath_type<scalar_t>>, void>
_vec_softmax_lastdim(
const scalar_t* input_data_base,
scalar_t* output_data_base,
int64_t outer_size,
int64_t dim_size) {
using Vec = vec::Vectorized<scalar_t>;
using fVec = vec::Vectorized<float>;
// See Note: grain_size value of 0
parallel_for(0, outer_size, 0, [&](int64_t begin, int64_t end) {
// thread local temp buffer.
auto buffer = std::make_unique<float []>(dim_size);
float* buffer_data = buffer.get();
for (const auto i : c10::irange(begin, end)) {
const scalar_t* input_data = input_data_base + i * dim_size;
scalar_t* output_data = output_data_base + i * dim_size;
// reduce to max and cache float input data
fVec max_fvec = fVec(-std::numeric_limits<float>::infinity());
int64_t d0 = 0;
for (; d0 < dim_size - (dim_size % Vec::size()); d0 += Vec::size()) {
Vec data_vec = Vec::loadu(input_data + d0);
auto [data_fvec0, data_fvec1] = vec::convert_to_float<scalar_t>(data_vec);
max_fvec = vec::maximum(max_fvec, data_fvec0);
max_fvec = vec::maximum(max_fvec, data_fvec1);
data_fvec0.store(buffer_data + d0);
data_fvec1.store(buffer_data + d0 + fVec::size());
}
float max_val = vec::vec_reduce_all([](fVec& x, fVec& y) { return vec::maximum(x, y); }, max_fvec);
for (; d0 < dim_size; d0++) {
float data_val = input_data[d0];
max_val = std::max(max_val, data_val);
buffer_data[d0] = data_val;
}
// map (x - max).exp() and reduce to sum
fVec sum_fvec = fVec(float(0));
int64_t d1 = 0;
for (; d1 < dim_size - (dim_size % fVec::size()); d1 += fVec::size()) {
fVec data_fvec = (fVec::loadu(buffer_data + d1) - fVec(max_val)).exp();
sum_fvec += data_fvec;
data_fvec.store(buffer_data + d1);
}
float sum_val = vec::vec_reduce_all([](fVec& x, fVec& y) { return x + y; }, sum_fvec);
for (; d1 < dim_size; d1++) {
float data_val = std::exp(buffer_data[d1] - max_val);
sum_val += data_val;
buffer_data[d1] = data_val;
}
sum_val = 1 / sum_val;
int64_t d2 = 0;
for (; d2 < dim_size - (dim_size % Vec::size()); d2 += Vec::size()) {
fVec out_fvec0 = fVec::loadu(buffer_data + d2) * fVec(sum_val);
fVec out_fvec1 = fVec::loadu(buffer_data + d2 + fVec::size()) * fVec(sum_val);
Vec out_vec = vec::convert_from_float<scalar_t>(out_fvec0, out_fvec1);
out_vec.store(output_data + d2);
}
for (; d2 < dim_size; d2++) {
output_data[d2] = scalar_t(buffer_data[d2] * sum_val);
}
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free