Updates

pytorch · Feb 25, 2025 · 24bea42 · 24bea42
1 parent a56f3ab
commit 24bea42
Show file tree

Hide file tree

Showing 29 changed files with 74 additions and 293 deletions.
diff --git a/benchmarks/microbenchmarks/benchmark_config.yml b/benchmarks/microbenchmarks/benchmark_config.yml
@@ -14,6 +14,6 @@ model_params:
         [4096, 4096, 1024]
       ]
   precision: "torch.bfloat16"
-  compile: false
+  compile: "max-autotune"
   device: "cuda"  # Change this to "cuda", "mps", "xpu", or "cpu" as needed
   model_type: "linear"
diff --git a/benchmarks/microbenchmarks/benchmark_inference.py b/benchmarks/microbenchmarks/benchmark_inference.py
@@ -4,27 +4,28 @@
 This script runs inference benchmarks and generates a micro-benchmarking report for it.
 - run() function is the main entry point for running inference benchmarks.
 """
+
 from copy import deepcopy
-import json
 from pathlib import Path
 from typing import Dict
 
 import torch
 from utils import (
+    BenchmarkConfig,
     benchmark_model_inference_in_microseconds,
     clean_caches,
     create_model_and_input,
     quantize_model,
-    BenchmarkConfig,
 )
 
+
 def run(config: BenchmarkConfig) -> Dict[str, float]:
     """Run inference benchmarks"""
     clean_caches()  # Clean caches
-    
+
     # Create output directory if it doesn't exist
     Path(config.output_dir).mkdir(parents=True, exist_ok=True)
-    
+
     base_model, input_data = create_model_and_input(
         config.model_type,
         config.m,
@@ -33,10 +34,7 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
         dtype=config.precision,
         device=config.device,
     )
-    print(
-        f"Starting benchmarking for model: {base_model.__class__.__name__} for quantization: {config.quantization}"
-    )
-
+
     # Use quantize_ to apply each quantization function to the model
     m_copy = deepcopy(base_model).eval().to(config.device)
     m_copy = quantize_model(m_copy, config.quantization)
@@ -46,16 +44,13 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
         m_copy = torch.compile(m_copy, mode=config.compile, fullgraph=True)
 
     # Run benchmarks
-    results = {}
-    
+    result = {**config.__dict__}
+
     # Benchmark time to run an inference call for quantized model
     model_time = benchmark_model_inference_in_microseconds(
         model=m_copy, input_data=input_data
     )
-    results[f"benchmark_model_inference_in_microseconds"] = model_time
-    print(
-        f"Time to run a {base_model.__class__.__name__}: {model_time:.2f} microseconds quantized with {config.quantization}"
-    )
+    result["benchmark_model_inference_in_microseconds"] = model_time
 
     # TODO: Benchmark time using profiler
     # Profile dtype model evaluation
@@ -68,4 +63,4 @@ def run(config: BenchmarkConfig) -> Dict[str, float]:
     # TODO: Benchmark op with cuda graph
     # time = benchmark_op_with_cuda_graph(op, args)
 
-    return results
+    return result
diff --git a/benchmarks/microbenchmarks/benchmark_runner.py b/benchmarks/microbenchmarks/benchmark_runner.py
@@ -13,13 +13,13 @@
 
 The YAML file should contain all necessary configuration parameters for the benchmarks.
 """
+
 from itertools import product
 from typing import Any, Dict, List, Tuple
 
-import torch
 import yaml
+from utils import BenchmarkConfig, generate_results_csv
 
-from utils import BenchmarkConfig
 
 def get_shapes_for_config(shape_config: Dict[str, Any]) -> List[Tuple[str, List[int]]]:
     """Get shapes for a given configuration"""
@@ -43,13 +43,16 @@ def load_benchmark_configs(config_path: str) -> List[BenchmarkConfig]:
         shapes = get_shapes_for_config(shape_config)
         # Generate combinations for each shape
         for quant, (shape_name, shape) in product(quantizations, shapes):
-            configs.append(BenchmarkConfig(
-                quantization=quant,
-                params=params,
-                shape_name=shape_name,
-                shape=shape,
-                output_dir=output_dir,
-                ))
+            configs.append(
+                BenchmarkConfig(
+                    quantization=quant,
+                    params=params,
+                    shape_name=shape_name,
+                    shape=shape,
+                    output_dir=output_dir,
+                )
+            )
+    print("Configs: ", configs[0].__dict__)
 
     return configs
 
@@ -60,22 +63,21 @@ def run_benchmarks_from_config(config_path: str) -> None:
 
     configs = load_benchmark_configs(config_path)
     results = []
-    print(f"Benchmarking Inference ......")
+    print("Benchmarking Inference ......")
     for config in configs:
         print(f"Running: {config.name}")
         result = run_inference(config)  # Pass the config object directly
         results.append(result)
 
-    # TODO: Convert results to csv
-    # Speedups:
+    # Add results to csv
+    generate_results_csv(results, configs[0].output_dir)
+
+    # TODO: Process results: Speedups:
     # 1. For different shapes for same model and quantization
     # 2. For different quantizations for same model and shape
     # 3. For different models for same quantization
 
 
-
-
-
 if __name__ == "__main__":
     import argparse
 

diff --git a/benchmarks/microbenchmarks/benchmark_training.py b/benchmarks/microbenchmarks/benchmark_training.py
@@ -10,4 +10,6 @@
 
 def run(config: BenchmarkConfig) -> None:
     """Run training benchmarks"""
-    raise NotImplementedError("Training benchmarks are not implemented yet. This is a placeholder function.")
+    raise NotImplementedError(
+        "Training benchmarks are not implemented yet. This is a placeholder function."
+    )
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m1024_k1024_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m1024_k1024_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m2048_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m2048_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m4096_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_custom_m4096_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m1024_k1024_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m1024_k1024_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m2048_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m2048_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m4096_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_baseline_linear_m4096_k4096_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m1024_k1024_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m1024_k1024_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m2048_k4096_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m2048_k4096_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m4096_k4096_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_custom_m4096_k4096_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m1024_k1024_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m1024_k1024_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m2048_k4096_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m2048_k4096_n1024_results.json
diff --git a/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m4096_k4096_n1024_results.json b/...ks/microbenchmarks/results/benchmark_int4wo-128-hqq_linear_m4096_k4096_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m1024_k1024_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m1024_k1024_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m2048_k4096_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m2048_k4096_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m4096_k4096_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_custom_m4096_k4096_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m1024_k1024_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m1024_k1024_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m2048_k4096_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m2048_k4096_n1024_results.json
diff --git a/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m4096_k4096_n1024_results.json b/...hmarks/microbenchmarks/results/benchmark_int4wo-128_linear_m4096_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m1024_k1024_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m1024_k1024_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m2048_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m2048_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m4096_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_custom_m4096_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m1024_k1024_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m1024_k1024_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m2048_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m2048_k4096_n1024_results.json
diff --git a/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m4096_k4096_n1024_results.json b/benchmarks/microbenchmarks/results/benchmark_int8wo_linear_m4096_k4096_n1024_results.json