void Class — pytorch Architecture
Architecture documentation for the void class in kernel_backward.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h lines 2454–2496
template <typename MatmulT>
static CUTLASS_DEVICE void accumulateInGmem(
typename MatmulT::DefaultEpilogue::SharedStorage& epilogue_smem,
typename MatmulT::Mma::FragmentC const& accum,
typename MatmulT::OutputTileIterator output_it,
bool first,
uint8_t warp_id,
uint8_t lane_id) {
using DefaultEpilogue = typename MatmulT::DefaultEpilogue;
using DefaultOutputOp = typename MatmulT::DefaultOutputOp;
using Mma = typename MatmulT::Mma;
int thread_id = 32 * warp_id + lane_id;
DISPATCH_BOOL(
first, kIsFirst, ([&]() {
static constexpr auto ScaleType = kIsFirst
? cutlass::epilogue::thread::ScaleType::Nothing
: cutlass::epilogue::thread::ScaleType::NoBetaScaling;
using EpilogueOutputOp =
typename cutlass::epilogue::thread::LinearCombination<
typename DefaultOutputOp::ElementOutput,
DefaultOutputOp::kCount,
typename DefaultOutputOp::ElementAccumulator,
typename DefaultOutputOp::ElementCompute,
ScaleType>;
using Epilogue =
typename cutlass::epilogue::threadblock::EpiloguePipelined<
typename DefaultEpilogue::Shape,
typename Mma::Operator,
DefaultEpilogue::kPartitionsK,
typename MatmulT::OutputTileIterator,
typename DefaultEpilogue::AccumulatorFragmentIterator,
typename DefaultEpilogue::WarpTileIterator,
typename DefaultEpilogue::SharedLoadIterator,
EpilogueOutputOp,
typename DefaultEpilogue::Padding,
DefaultEpilogue::kFragmentsPerIteration,
true // IterationsUnroll
>;
EpilogueOutputOp rescale({1, 1});
Epilogue epilogue(epilogue_smem, thread_id, warp_id, lane_id);
epilogue(rescale, output_it, accum, output_it);
}));
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free