speedup_experiment() — pytorch Function Reference
Architecture documentation for the speedup_experiment() function in common.py from the pytorch codebase.
Entity Profile
Dependency Diagram
graph TD 04a3a4a6_8db3_854d_a893_02c9542bf9dd["speedup_experiment()"] 19aa986b_7c81_6518_3e84_45819cd8e90d["speedup_experiment_fx2trt()"] 19aa986b_7c81_6518_3e84_45819cd8e90d -->|calls| 04a3a4a6_8db3_854d_a893_02c9542bf9dd eeda7f55_d302_b33d_bc04_70a1b44397fa["overhead_experiment()"] eeda7f55_d302_b33d_bc04_70a1b44397fa -->|calls| 04a3a4a6_8db3_854d_a893_02c9542bf9dd d89eb84e_a6ed_8b3f_56e7_7c49cca94d00["export_aot_inductor()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| d89eb84e_a6ed_8b3f_56e7_7c49cca94d00 529640b9_a20a_d7f3_29f8_900581eb0f3d["export_nativert()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 529640b9_a20a_d7f3_29f8_900581eb0f3d 9b71719a_7134_fa38_fd95_b665398662db["torchscript_jit_trace()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 9b71719a_7134_fa38_fd95_b665398662db 5130216c_70e0_8051_b662_1aed31c12627["aot_precompile()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 5130216c_70e0_8051_b662_1aed31c12627 c9be2096_e6d7_2374_ad2e_a6e33f435ada["run()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| c9be2096_e6d7_2374_ad2e_a6e33f435ada d0c96460_b5ec_95d1_765a_084b5860c03d["randomize_input()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| d0c96460_b5ec_95d1_765a_084b5860c03d cea445e5_003e_b07a_0de9_43b0801fe53a["maybe_mark_step()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| cea445e5_003e_b07a_0de9_43b0801fe53a 9c8df7bf_0e05_9bbb_5e2f_6c88f28b52d4["timed()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 9c8df7bf_0e05_9bbb_5e2f_6c88f28b52d4 09313e6d_4bd1_587f_c45b_b58695ffc25a["trace_handler()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 09313e6d_4bd1_587f_c45b_b58695ffc25a 3473d1a5_c1f5_fc97_006e_79a1d3081bef["write_outputs()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| 3473d1a5_c1f5_fc97_006e_79a1d3081bef b8cdd827_b831_469a_75e3_9eb4a7bb1874["output_signpost()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| b8cdd827_b831_469a_75e3_9eb4a7bb1874 cae7ee8e_ad93_67b5_9bdd_648c2e822459["get_suite_from_model_iter_fn()"] 04a3a4a6_8db3_854d_a893_02c9542bf9dd -->|calls| cae7ee8e_ad93_67b5_9bdd_648c2e822459 style 04a3a4a6_8db3_854d_a893_02c9542bf9dd fill:#6366f1,stroke:#818cf8,color:#fff
Relationship Graph
Source Code
benchmarks/dynamo/common.py lines 1039–1217
def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
"""
Measure speedups over eager.
Writes to ./speedups.csv
"""
timings = np.zeros((args.repeat, 2), np.float64)
# if we randomize the input, we should also check the result is correct
should_randomize_input = args.randomize_input
import contextlib
from torch._inductor.utils import maybe_profile
@contextlib.contextmanager
def maybe_mark_profile(*args, **kwargs):
prof: torch.profiler.profile = kwargs.pop("p", None)
mark = kwargs.pop("mark", None)
if prof:
with torch.profiler.record_function(mark):
yield
else:
yield
times = args.iterations_per_run
# Use higher tolerance for XLA since XLA cause numerical instability when
# graph size changes
tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4
torch._dynamo.config.repro_tolerance = tolerance
with maybe_profile(args.export_profiler_trace, **args.profile_details) as p:
if args.export_aot_inductor:
frozen_model_iter_fn = export_aot_inductor(
model, example_inputs, args.inductor_compile_mode
)
elif args.export_nativert:
frozen_model_iter_fn = export_nativert(model, example_inputs)
elif args.torchscript_jit_trace:
frozen_model_iter_fn = torchscript_jit_trace(model, example_inputs)
elif args.aot_precompile:
frozen_model_iter_fn = aot_precompile(model, example_inputs)
else:
if kwargs["hf_llm"]:
# If it's an llm, we want to optimize model.forward, and use
# the generate function
model.forward = torch._dynamo.run(model)
frozen_model_iter_fn = model_iter_fn
else:
frozen_model_iter_fn = torch._dynamo.run(model_iter_fn)
for rep in trange(args.repeat, desc="running benchmark"):
inputs = (
randomize_input(copy.deepcopy(example_inputs))
if should_randomize_input
else example_inputs
)
# need call mark_step to perform the computation
# on randomize_input. Otherwise the first call using the
# inputs will incur high penalty then the next one.
maybe_mark_step(args)
# interleave the runs to handle frequency scaling and load changes
with (
maybe_mark_profile(p=p, mark="expected"),
torch.compiler.set_stance("force_eager"),
):
timings[rep, 0], expected_output = timed(
model,
model_iter_fn,
inputs,
return_result=True,
times=times,
collect_outputs=args.collect_outputs,
batch_size=kwargs.get("batch_size"),
)
# call mark_step between the 2 calls to make the comparison fair.
maybe_mark_step(args)
with maybe_mark_profile(p=p, mark="actual"):
timings[rep, 1], actual_output = timed(
model,
frozen_model_iter_fn,
inputs,
return_result=True,
times=times,
collect_outputs=args.collect_outputs,
)
if args.export_profiler_trace:
name = args.profiler_trace_name + "_" + model.name
if hasattr(args, "rank"):
name += f"_rank_{args.rank}"
if args.export_perfdoctor and trace_handler:
trace_handler(name, p)
else:
name += ".json"
name = os.path.join(torch._dynamo.config.base_dir, name)
p.export_chrome_trace(name)
median = np.median(timings, axis=0)
speedup = median[0] / median[1]
if args.dump_raw_metrics:
np.save(
f"{output_filename[:-4]}-raw_timings-{current_name}-{current_device}.npy",
timings,
)
first_headers = ["dev", "name", "batch_size"]
first_fields = [current_device, current_name, current_batch_size]
if "tag" in kwargs:
first_headers.append("tag")
first_fields.append(kwargs["tag"])
headers = first_headers + ["speedup", "abs_latency"]
row = first_fields + [float(speedup), median[1] * 1000]
msg = f"{speedup:.3f}x"
if args.baseline:
headers.extend(
[
"baseline",
"speedup_vs_baseline",
]
)
df = pd.read_csv(args.baseline)
try:
baseline_speedup = df[df["name"] == current_name]["speedup"].item()
row.extend([baseline_speedup, speedup / baseline_speedup])
msg = f"{baseline_speedup:.3f}x -> {speedup:.3f}x [{speedup / baseline_speedup:.3f}x]"
except (KeyError, ZeroDivisionError):
row.extend(
[
0.0,
0.0,
]
)
if "compilation_latency" in kwargs:
headers += [
"compilation_latency",
"compression_ratio",
"eager_peak_mem",
"dynamo_peak_mem",
]
row.append(kwargs["compilation_latency"])
row.append(kwargs["compression_ratio"])
row.append(kwargs["eager_peak_mem"])
row.append(kwargs["dynamo_peak_mem"])
if "cache_lookup_latency" in kwargs:
headers.append("cache_lookup_latency")
row.append(kwargs["cache_lookup_latency"])
if "dynamo_stats" in kwargs:
for k, v in kwargs["dynamo_stats"].items():
headers.append(k)
row.append(v)
write_outputs(
output_filename,
headers,
row,
)
c_headers, c_data = torch._dynamo.utils.compile_times(repr="csv", aggregate=True)
if output_filename.find(".csv") <= 0:
raise AssertionError(
f"expected output_filename to be a .csv, but got {output_filename}"
)
write_outputs(
output_filename[:-4] + "_compilation_metrics.csv",
first_headers + c_headers,
first_fields + c_data,
)
output_signpost(
dict(zip(headers, row)),
args,
get_suite_from_model_iter_fn(model_iter_fn),
)
return msg
Domain
Subdomains
Calls
Source
Frequently Asked Questions
What does speedup_experiment() do?
speedup_experiment() is a function in the pytorch codebase.
What does speedup_experiment() call?
speedup_experiment() calls 12 function(s): aot_precompile, export_aot_inductor, export_nativert, get_suite_from_model_iter_fn, maybe_mark_step, output_signpost, randomize_input, run, and 4 more.
What calls speedup_experiment()?
speedup_experiment() is called by 2 function(s): overhead_experiment, speedup_experiment_fx2trt.
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free