HuggingfaceRunner Class — pytorch Architecture
Architecture documentation for the HuggingfaceRunner class in huggingface.py from the pytorch codebase.
Entity Profile
Relationship Graph
Source Code
benchmarks/dynamo/huggingface.py lines 338–569
class HuggingfaceRunner(BenchmarkRunner):
def __init__(self):
super().__init__()
self.suite_name = "huggingface"
@property
def _config(self):
return load_yaml_file("huggingface.yaml")
@property
def _skip(self):
return self._config["skip"]
@property
def _accuracy(self):
return self._config["accuracy"]
@property
def skip_models(self):
return self._skip["all"]
@property
def skip_models_for_cpu(self):
return self._skip["device"]["cpu"]
@property
def fp32_only_models(self):
return self._config["only_fp32"]
@property
def skip_models_due_to_control_flow(self):
return self._skip["control_flow"]
def use_larger_multiplier_for_smaller_tensor(self, name):
return name in [
"GPT2ForSequenceClassification",
]
def _get_model_cls_and_config(self, model_name):
if model_name not in EXTRA_MODELS:
model_cls = get_module_cls_by_model_name(model_name)
config_cls = model_cls.config_class
config = config_cls()
# NB: some models need a pad token defined to handle BS > 1
if (
model_cls
in [
GPT2ForSequenceClassification,
GPTNeoForSequenceClassification,
GPTJForSequenceClassification,
]
or model_cls.__name__.startswith("Roberta")
or model_cls.__name__.startswith("Marian")
):
config.pad_token_id = 0
else:
config, model_cls = EXTRA_MODELS[model_name]
return model_cls, config
@download_retry_decorator
def _download_model(self, model_name):
model_cls, config = self._get_model_cls_and_config(model_name)
if "auto" in model_cls.__module__:
# Handle auto classes
model = model_cls.from_config(config)
else:
model = model_cls(config)
return model
def load_model(
self,
device,
model_name,
batch_size=None,
extra_args=None,
):
is_training = self.args.training
use_eval_mode = self.args.use_eval_mode
dtype = torch.float32
reset_rng_state()
# Get batch size
if model_name in BATCH_SIZE_KNOWN_MODELS:
batch_size_default = BATCH_SIZE_KNOWN_MODELS[model_name]
elif batch_size is None:
batch_size_default = 16
log.info(
f"Batch size not specified for {model_name}. Setting batch_size=16" # noqa: G004
)
if batch_size is None:
batch_size = batch_size_default
batch_size_divisors = self._config["batch_size"]["divisors"]
if model_name in batch_size_divisors:
batch_size = max(int(batch_size / batch_size_divisors[model_name]), 1)
log.info(
f"Running smaller batch size={batch_size} for {model_name}, orig batch_size={batch_size_default}" # noqa: G004
)
# Get model and example inputs
if model_name in HF_LLM_MODELS:
benchmark_cls = HF_LLM_MODELS[model_name]
model, example_inputs = benchmark_cls.get_model_and_inputs(
model_name, device
)
# Set this flag so that when we test for speedup, we use
# model.generate instead of using model.forward
self.hf_llm = True
def generate(self, _, example_inputs, collect_outputs=True):
return model.generate(**example_inputs)
self.generate = types.MethodType(generate, self)
else:
self.hf_llm = False
model_cls, config = self._get_model_cls_and_config(model_name)
model = self._download_model(model_name)
model = model.to(device, dtype=dtype)
example_inputs = generate_inputs_for_model(
model_cls, model, model_name, batch_size, device, include_loss_args=True
)
# So we can check for correct gradients without eliminating the dropout computation
for attr in dir(config):
if "drop" in attr and isinstance(getattr(config, attr), float):
setattr(config, attr, 1e-30)
# Turning off kv cache for torchbench models. This is not the right
# thing to do, but the pt2 dashboard is outdated. Real transformers
# benchmarks will be added soon using a different infra.
if hasattr(model, "config") and hasattr(model.config, "use_cache"):
model.config.use_cache = False
if self.args.enable_activation_checkpointing:
model.gradient_checkpointing_enable()
if (
is_training
and not use_eval_mode
and not (
self.args.accuracy and model_name in self._config["only_inference"]
)
):
model.train()
else:
model.eval()
self.validate_model(model, example_inputs)
return device, model_name, model, example_inputs, batch_size
def iter_model_names(self, args):
model_names = list(BATCH_SIZE_KNOWN_MODELS.keys()) + list(EXTRA_MODELS.keys())
model_names = set(model_names)
model_names = sorted(model_names)
start, end = self.get_benchmark_indices(len(model_names))
for index, model_name in enumerate(model_names):
if index < start or index >= end:
continue
if (
not re.search("|".join(args.filter), model_name, re.IGNORECASE)
or re.search("|".join(args.exclude), model_name, re.IGNORECASE)
or model_name in args.exclude_exact
or model_name in self.skip_models
):
continue
yield model_name
@property
def skip_accuracy_checks_large_models_dashboard(self):
if self.args.dashboard or self.args.accuracy:
return self._accuracy["skip"]["large_models"]
return set()
@property
def get_output_amp_train_process_func(self):
return {}
def pick_grad(self, name, is_training):
if is_training:
return torch.enable_grad()
else:
return torch.no_grad()
def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
cosine = self.args.cosine
if is_training:
from torch._inductor import config as inductor_config
if (name in self._config["tolerance"]["higher_training"]) or (
inductor_config.max_autotune
and name in self._config["tolerance"]["higher_max_autotune_training"]
):
return 2e-2, cosine
else:
return 1e-2, cosine
else:
if (
current_device == "cpu"
and name in self._config["tolerance"]["higher_inference_cpu"]
):
return 5e-3, cosine
if name in self._config["tolerance"]["higher_inference"]:
return 4e-3, cosine
return 1e-3, cosine
def compute_loss(self, pred):
return pred[0]
def forward_pass(self, mod, inputs, collect_outputs=True):
with self.autocast(**self.autocast_arg):
res = mod(**inputs)
return res.logits if self.hf_llm else res
def forward_and_backward_pass(self, mod, inputs, collect_outputs=True):
cloned_inputs = clone_inputs(inputs)
self.optimizer_zero_grad(mod)
with self.autocast(**self.autocast_arg):
pred = mod(**cloned_inputs)
loss = self.compute_loss(pred)
self.grad_scaler.scale(loss).backward()
self.optimizer_step()
if collect_outputs:
return collect_results(mod, None, loss, cloned_inputs)
return None
Domain
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free