SoftmaxBackward Class — pytorch Architecture
Architecture documentation for the SoftmaxBackward class in kernels.py from the pytorch codebase.
Entity Profile
Relationship Graph
Source Code
benchmarks/dynamo/genai_layers/kernels.py lines 282–353
class SoftmaxBackward(BenchmarkKernel):
def __init__(self, script_args):
super().__init__(script_args)
self.available_backends = ["eager", "compiled", "quack", "liger"]
def get_shapes(self) -> tuple[tuple[int, ...], ...]:
return (
(32768, 256),
(32768, 512),
(32768, 1024),
(32768, 2048),
(32768, 4096),
(32768, 8192),
(32768, 16384),
(32768, 32768),
(32768, 65536),
(16384, 131072),
(8192, 262144),
)
def get_memory_bytes(self, args, kwargs) -> int:
# Memory: read dy and y, write ax backward
x, dy = args
M, N = x.shape
return 3 * M * N * x.dtype.itemsize
def eager(self, args, kwargs=None) -> Any:
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, dy = args
y = F.softmax(x, dim=-1)
return lambda: torch.autograd.grad(y, x, grad_outputs=dy, retain_graph=True)
def compiled(self, args, kwargs=None) -> Any:
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, dy = args
compiled_softmax = torch.compile(
lambda x: F.softmax(x, dim=-1), mode=self.compile_mode, fullgraph=True
)
y = compiled_softmax(x)
return lambda: torch.autograd.grad(y, x, grad_outputs=dy, retain_graph=True)
def quack(self, args, kwargs=None) -> Any:
from quack.softmax import softmax
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, dy = args
y = softmax(x)
return lambda: torch.autograd.grad(y, x, grad_outputs=dy, retain_graph=True)
def liger(self, args, kwargs=None) -> Any:
from liger_kernel.transformers.softmax import LigerSoftmax
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, dy = args
softmax = LigerSoftmax().to("cuda")
y = softmax(x)
return lambda: torch.autograd.grad(y, x, grad_outputs=dy, retain_graph=True)
def benchmark(self):
for M, N in self.get_shapes():
print(f"Tensor dimensions: [{M}, {N}]")
torch_dtype = cutlass_torch.dtype(cutlass.BFloat16)
x = 0.1 * torch.randn(
M, N, device="cuda", dtype=torch_dtype, requires_grad=True
)
dy = torch.randn(M, N, device="cuda", dtype=torch_dtype)
self.benchmark_single_shape((x, dy), setting=f"shape: [{M}, {N}]")
Domain
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free