Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jainapurva committed Feb 25, 2025
1 parent a56f3ab commit 24bea42
Show file tree
Hide file tree
Showing 29 changed files with 74 additions and 293 deletions.
2 changes: 1 addition & 1 deletion benchmarks/microbenchmarks/benchmark_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ model_params:
[4096, 4096, 1024]
]
precision: "torch.bfloat16"
compile: false
compile: "max-autotune"
device: "cuda" # Change this to "cuda", "mps", "xpu", or "cpu" as needed
model_type: "linear"
25 changes: 10 additions & 15 deletions benchmarks/microbenchmarks/benchmark_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,28 @@
This script runs inference benchmarks and generates a micro-benchmarking report for it.
- run() function is the main entry point for running inference benchmarks.
"""

from copy import deepcopy
import json
from pathlib import Path
from typing import Dict

import torch
from utils import (
BenchmarkConfig,
benchmark_model_inference_in_microseconds,
clean_caches,
create_model_and_input,
quantize_model,
BenchmarkConfig,
)


def run(config: BenchmarkConfig) -> Dict[str, float]:
"""Run inference benchmarks"""
clean_caches() # Clean caches

# Create output directory if it doesn't exist
Path(config.output_dir).mkdir(parents=True, exist_ok=True)

base_model, input_data = create_model_and_input(
config.model_type,
config.m,
Expand All @@ -33,10 +34,7 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
dtype=config.precision,
device=config.device,
)
print(
f"Starting benchmarking for model: {base_model.__class__.__name__} for quantization: {config.quantization}"
)


# Use quantize_ to apply each quantization function to the model
m_copy = deepcopy(base_model).eval().to(config.device)
m_copy = quantize_model(m_copy, config.quantization)
Expand All @@ -46,16 +44,13 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
m_copy = torch.compile(m_copy, mode=config.compile, fullgraph=True)

# Run benchmarks
results = {}
result = {**config.__dict__}

# Benchmark time to run an inference call for quantized model
model_time = benchmark_model_inference_in_microseconds(
model=m_copy, input_data=input_data
)
results[f"benchmark_model_inference_in_microseconds"] = model_time
print(
f"Time to run a {base_model.__class__.__name__}: {model_time:.2f} microseconds quantized with {config.quantization}"
)
result["benchmark_model_inference_in_microseconds"] = model_time

# TODO: Benchmark time using profiler
# Profile dtype model evaluation
Expand All @@ -68,4 +63,4 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
# TODO: Benchmark op with cuda graph
# time = benchmark_op_with_cuda_graph(op, args)

return results
return result
32 changes: 17 additions & 15 deletions benchmarks/microbenchmarks/benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
The YAML file should contain all necessary configuration parameters for the benchmarks.
"""

from itertools import product
from typing import Any, Dict, List, Tuple

import torch
import yaml
from utils import BenchmarkConfig, generate_results_csv

from utils import BenchmarkConfig

def get_shapes_for_config(shape_config: Dict[str, Any]) -> List[Tuple[str, List[int]]]:
"""Get shapes for a given configuration"""
Expand All @@ -43,13 +43,16 @@ def load_benchmark_configs(config_path: str) -> List[BenchmarkConfig]:
shapes = get_shapes_for_config(shape_config)
# Generate combinations for each shape
for quant, (shape_name, shape) in product(quantizations, shapes):
configs.append(BenchmarkConfig(
quantization=quant,
params=params,
shape_name=shape_name,
shape=shape,
output_dir=output_dir,
))
configs.append(
BenchmarkConfig(
quantization=quant,
params=params,
shape_name=shape_name,
shape=shape,
output_dir=output_dir,
)
)
print("Configs: ", configs[0].__dict__)

return configs

Expand All @@ -60,22 +63,21 @@ def run_benchmarks_from_config(config_path: str) -> None:

configs = load_benchmark_configs(config_path)
results = []
print(f"Benchmarking Inference ......")
print("Benchmarking Inference ......")
for config in configs:
print(f"Running: {config.name}")
result = run_inference(config) # Pass the config object directly
results.append(result)

# TODO: Convert results to csv
# Speedups:
# Add results to csv
generate_results_csv(results, configs[0].output_dir)

# TODO: Process results: Speedups:
# 1. For different shapes for same model and quantization
# 2. For different quantizations for same model and shape
# 3. For different models for same quantization





if __name__ == "__main__":
import argparse

Expand Down
4 changes: 3 additions & 1 deletion benchmarks/microbenchmarks/benchmark_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@

def run(config: BenchmarkConfig) -> None:
"""Run training benchmarks"""
raise NotImplementedError("Training benchmarks are not implemented yet. This is a placeholder function.")
raise NotImplementedError(
"Training benchmarks are not implemented yet. This is a placeholder function."
)

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 24bea42

Please sign in to comment.