map2_reduce_all Class — pytorch Architecture
Architecture documentation for the map2_reduce_all class in functional_bfloat16.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/cpu/vec/functional_bfloat16.h lines 288–353
template <
typename scalar_t,
typename MapOp,
typename ReduceOp,
typename std::enable_if_t<is_reduced_floating_point_v<scalar_t>, int> = 0>
inline float map2_reduce_all(
const MapOp& map_fun,
const ReduceOp& red_fun,
const scalar_t* data,
const scalar_t* data2,
int64_t size) {
using bVec = vec::Vectorized<scalar_t>;
using fVec = vec::Vectorized<float>;
if (size < bVec::size()) {
bVec data_bvec = bVec::loadu(data, size);
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
bVec data2_bvec = bVec::loadu(data2, size);
auto [data2_fvec0, data2_fvec1] = convert_to_float<scalar_t>(data2_bvec);
if (size > fVec::size()) {
data_fvec0 = map_fun(data_fvec0, data2_fvec0);
data_fvec1 = map_fun(data_fvec1, data2_fvec1);
data_fvec0 = fVec::set(
data_fvec0, red_fun(data_fvec0, data_fvec1), size - fVec::size());
return vec_reduce_all<float>(red_fun, data_fvec0, fVec::size());
} else {
data_fvec0 = map_fun(data_fvec0, data2_fvec0);
return vec_reduce_all<float>(red_fun, data_fvec0, size);
}
}
int64_t d = bVec::size();
bVec acc_bvec = bVec::loadu(data);
auto [acc_fvec0, acc_fvec1] = convert_to_float<scalar_t>(acc_bvec);
bVec acc2_bvec = bVec::loadu(data2);
auto [acc2_fvec0, acc2_fvec1] = convert_to_float<scalar_t>(acc2_bvec);
acc_fvec0 = map_fun(acc_fvec0, acc2_fvec0);
acc_fvec1 = map_fun(acc_fvec1, acc2_fvec1);
for (; d < size - (size % bVec::size()); d += bVec::size()) {
bVec data_bvec = bVec::loadu(data + d);
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
bVec data2_bvec = bVec::loadu(data2 + d);
auto [data2_fvec0, data2_fvec1] = convert_to_float<scalar_t>(data2_bvec);
data_fvec0 = map_fun(data_fvec0, data2_fvec0);
data_fvec1 = map_fun(data_fvec1, data2_fvec1);
acc_fvec0 = red_fun(acc_fvec0, data_fvec0);
acc_fvec1 = red_fun(acc_fvec1, data_fvec1);
}
if (size - d > 0) {
bVec data_bvec = bVec::loadu(data + d, size - d);
auto [data_fvec0, data_fvec1] = convert_to_float<scalar_t>(data_bvec);
bVec data2_bvec = bVec::loadu(data2 + d, size - d);
auto [data2_fvec0, data2_fvec1] = convert_to_float<scalar_t>(data2_bvec);
if (size - d > fVec::size()) {
data_fvec0 = map_fun(data_fvec0, data2_fvec0);
data_fvec1 = map_fun(data_fvec1, data2_fvec1);
acc_fvec0 = red_fun(acc_fvec0, data_fvec0);
acc_fvec1 = fVec::set(
acc_fvec1, red_fun(acc_fvec1, data_fvec1), size - d - fVec::size());
} else {
data_fvec0 = map_fun(data_fvec0, data2_fvec0);
acc_fvec0 =
fVec::set(acc_fvec0, red_fun(acc_fvec0, data_fvec0), size - d);
}
}
acc_fvec0 = red_fun(acc_fvec0, acc_fvec1);
return vec_reduce_all<float>(red_fun, acc_fvec0);
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free