Home / Class/ HuggingfaceRunner Class — pytorch Architecture

HuggingfaceRunner Class — pytorch Architecture

Architecture documentation for the HuggingfaceRunner class in huggingface.py from the pytorch codebase.

Entity Profile

Relationship Graph

Source Code

benchmarks/dynamo/huggingface.py lines 338–569

class HuggingfaceRunner(BenchmarkRunner):
    def __init__(self):
        super().__init__()
        self.suite_name = "huggingface"

    @property
    def _config(self):
        return load_yaml_file("huggingface.yaml")

    @property
    def _skip(self):
        return self._config["skip"]

    @property
    def _accuracy(self):
        return self._config["accuracy"]

    @property
    def skip_models(self):
        return self._skip["all"]

    @property
    def skip_models_for_cpu(self):
        return self._skip["device"]["cpu"]

    @property
    def fp32_only_models(self):
        return self._config["only_fp32"]

    @property
    def skip_models_due_to_control_flow(self):
        return self._skip["control_flow"]

    def use_larger_multiplier_for_smaller_tensor(self, name):
        return name in [
            "GPT2ForSequenceClassification",
        ]

    def _get_model_cls_and_config(self, model_name):
        if model_name not in EXTRA_MODELS:
            model_cls = get_module_cls_by_model_name(model_name)
            config_cls = model_cls.config_class
            config = config_cls()

            # NB: some models need a pad token defined to handle BS > 1
            if (
                model_cls
                in [
                    GPT2ForSequenceClassification,
                    GPTNeoForSequenceClassification,
                    GPTJForSequenceClassification,
                ]
                or model_cls.__name__.startswith("Roberta")
                or model_cls.__name__.startswith("Marian")
            ):
                config.pad_token_id = 0

        else:
            config, model_cls = EXTRA_MODELS[model_name]

        return model_cls, config

    @download_retry_decorator
    def _download_model(self, model_name):
        model_cls, config = self._get_model_cls_and_config(model_name)
        if "auto" in model_cls.__module__:
            # Handle auto classes
            model = model_cls.from_config(config)
        else:
            model = model_cls(config)
        return model

    def load_model(
        self,
        device,
        model_name,
        batch_size=None,
        extra_args=None,
    ):
        is_training = self.args.training
        use_eval_mode = self.args.use_eval_mode
        dtype = torch.float32
        reset_rng_state()

        # Get batch size
        if model_name in BATCH_SIZE_KNOWN_MODELS:
            batch_size_default = BATCH_SIZE_KNOWN_MODELS[model_name]
        elif batch_size is None:
            batch_size_default = 16
            log.info(
                f"Batch size not specified for {model_name}. Setting batch_size=16"  # noqa: G004
            )

        if batch_size is None:
            batch_size = batch_size_default
            batch_size_divisors = self._config["batch_size"]["divisors"]
            if model_name in batch_size_divisors:
                batch_size = max(int(batch_size / batch_size_divisors[model_name]), 1)
                log.info(
                    f"Running smaller batch size={batch_size} for {model_name}, orig batch_size={batch_size_default}"  # noqa: G004
                )

        # Get model and example inputs
        if model_name in HF_LLM_MODELS:
            benchmark_cls = HF_LLM_MODELS[model_name]
            model, example_inputs = benchmark_cls.get_model_and_inputs(
                model_name, device
            )

            # Set this flag so that when we test for speedup, we use
            # model.generate instead of using model.forward
            self.hf_llm = True

            def generate(self, _, example_inputs, collect_outputs=True):
                return model.generate(**example_inputs)

            self.generate = types.MethodType(generate, self)

        else:
            self.hf_llm = False

            model_cls, config = self._get_model_cls_and_config(model_name)
            model = self._download_model(model_name)
            model = model.to(device, dtype=dtype)

            example_inputs = generate_inputs_for_model(
                model_cls, model, model_name, batch_size, device, include_loss_args=True
            )

            # So we can check for correct gradients without eliminating the dropout computation
            for attr in dir(config):
                if "drop" in attr and isinstance(getattr(config, attr), float):
                    setattr(config, attr, 1e-30)

            # Turning off kv cache for torchbench models. This is not the right
            # thing to do, but the pt2 dashboard is outdated. Real transformers
            # benchmarks will be added soon using a different infra.
            if hasattr(model, "config") and hasattr(model.config, "use_cache"):
                model.config.use_cache = False

        if self.args.enable_activation_checkpointing:
            model.gradient_checkpointing_enable()

        if (
            is_training
            and not use_eval_mode
            and not (
                self.args.accuracy and model_name in self._config["only_inference"]
            )
        ):
            model.train()
        else:
            model.eval()

        self.validate_model(model, example_inputs)
        return device, model_name, model, example_inputs, batch_size

    def iter_model_names(self, args):
        model_names = list(BATCH_SIZE_KNOWN_MODELS.keys()) + list(EXTRA_MODELS.keys())
        model_names = set(model_names)
        model_names = sorted(model_names)

        start, end = self.get_benchmark_indices(len(model_names))
        for index, model_name in enumerate(model_names):
            if index < start or index >= end:
                continue
            if (
                not re.search("|".join(args.filter), model_name, re.IGNORECASE)
                or re.search("|".join(args.exclude), model_name, re.IGNORECASE)
                or model_name in args.exclude_exact
                or model_name in self.skip_models
            ):
                continue
            yield model_name

    @property
    def skip_accuracy_checks_large_models_dashboard(self):
        if self.args.dashboard or self.args.accuracy:
            return self._accuracy["skip"]["large_models"]
        return set()

    @property
    def get_output_amp_train_process_func(self):
        return {}

    def pick_grad(self, name, is_training):
        if is_training:
            return torch.enable_grad()
        else:
            return torch.no_grad()

    def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
        cosine = self.args.cosine
        if is_training:
            from torch._inductor import config as inductor_config

            if (name in self._config["tolerance"]["higher_training"]) or (
                inductor_config.max_autotune
                and name in self._config["tolerance"]["higher_max_autotune_training"]
            ):
                return 2e-2, cosine
            else:
                return 1e-2, cosine
        else:
            if (
                current_device == "cpu"
                and name in self._config["tolerance"]["higher_inference_cpu"]
            ):
                return 5e-3, cosine
            if name in self._config["tolerance"]["higher_inference"]:
                return 4e-3, cosine
        return 1e-3, cosine

    def compute_loss(self, pred):
        return pred[0]

    def forward_pass(self, mod, inputs, collect_outputs=True):
        with self.autocast(**self.autocast_arg):
            res = mod(**inputs)
        return res.logits if self.hf_llm else res

    def forward_and_backward_pass(self, mod, inputs, collect_outputs=True):
        cloned_inputs = clone_inputs(inputs)
        self.optimizer_zero_grad(mod)
        with self.autocast(**self.autocast_arg):
            pred = mod(**cloned_inputs)
            loss = self.compute_loss(pred)
        self.grad_scaler.scale(loss).backward()
        self.optimizer_step()
        if collect_outputs:
            return collect_results(mod, None, loss, cloned_inputs)
        return None

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free