LayerNormForward Class — pytorch Architecture
Architecture documentation for the LayerNormForward class in kernels.py from the pytorch codebase.
Entity Profile
Relationship Graph
Source Code
benchmarks/dynamo/genai_layers/kernels.py lines 571–644
class LayerNormForward(BenchmarkKernel):
def __init__(self, script_args):
super().__init__(script_args)
self.available_backends = ["eager", "compiled", "quack", "liger"]
def get_shapes(self) -> tuple[tuple[int, ...], ...]:
# OOM for (16384, 131072) on h100
return (
(32768, 256),
(32768, 512),
(32768, 1024),
(32768, 2048),
(32768, 4096),
(32768, 8192),
(32768, 16384),
(32768, 32768),
(32768, 65536),
) + extra_shapes_for_norm
def get_memory_bytes(self, args, kwargs) -> int:
x, w = args
M, N = x.shape
# Read x ([M, N]), w ([N]), write y ([M, N])
return 2 * M * N * x.dtype.itemsize + N * w.dtype.itemsize
def layernorm_ref(self, x: torch.Tensor, w: torch.Tensor, eps: float = 1e-6):
x_f32 = x.float()
return F.layer_norm(x_f32, w.shape, w, None, eps).to(x.dtype)
def eager(self, args, kwargs=None) -> Any:
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, w = args
return lambda: self.layernorm_ref(x, w)
def compiled(self, args, kwargs=None) -> Any:
if kwargs is not None:
raise AssertionError(f"Expected kwargs to be None, but got {kwargs}")
x, w = args
# Mark batch size as dynamic for realistic workload
torch._dynamo.mark_dynamic(x, 0)
compiled_layernorm = torch.compile(
self.layernorm_ref, mode=self.compile_mode, fullgraph=True
)
return lambda: compiled_layernorm(x, w, eps=1e-6)
def quack(self, args, kwargs) -> Any:
# Note: quack layernorm does not support bias
from quack.layernorm import layernorm
x, w = args
return lambda: layernorm(x, w, eps=1e-6)
def liger(self, args, kwargs) -> Any:
from liger_kernel.transformers.layer_norm import LigerLayerNorm
x, w = args
M, N = x.shape
liger_layernorm = LigerLayerNorm(hidden_size=N, eps=1e-6).cuda()
liger_layernorm.weight.data.copy_(w)
liger_layernorm.bias.data.copy_(
torch.zeros(N, device="cuda", dtype=torch.float32)
)
return lambda: liger_layernorm(x)
def benchmark(self):
for M, N in self.get_shapes():
print(f"Tensor dimensions: [{M}, {N}]")
torch_dtype = cutlass_torch.dtype(cutlass.BFloat16)
x = torch.randn(M, N, device="cuda", dtype=torch_dtype)
w = torch.randn(N, device="cuda", dtype=torch.float32)
self.benchmark_single_shape((x, w), setting=f"shape: [{M}, {N}]")
Domain
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free