NVIDIA · Mar 3, 2025
diff --git a/‎.pre-commit-config.yaml
Lines changed: 33 additions & 21 deletions b/‎.pre-commit-config.yaml
Lines changed: 33 additions & 21 deletions
diff --git a/‎CHANGELOG.rst
Lines changed: 22 additions & 0 deletions b/‎CHANGELOG.rst
Lines changed: 22 additions & 0 deletions
@@ -6,42 +6,43 @@ exclude: >
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
-      - id: trailing-whitespace
-      - id: mixed-line-ending
-        args: [--fix=lf]
-      - id: end-of-file-fixer
-      - id: check-merge-conflict
-      - id: requirements-txt-fixer
-      - id: debug-statements
-      - id: check-json
-        exclude: ^.vscode/.*.json # vscode files can take comments
-      - id: check-yaml
-        args: [--allow-multiple-documents]
-      - id: check-toml
       - id: check-added-large-files
         args: [--maxkb=500, --enforce-all]
         exclude: >
           (?x)^(
               examples/diffusers/quantization/assets/.*.png|
               examples/diffusers/cache_diffusion/assets/.*.png|
           )$
+      - id: check-json
+        exclude: ^.vscode/.*.json # vscode files can take comments
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-toml
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: [--fix=lf]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
 
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.17
+    rev: 0.7.21
     hooks:
       - id: mdformat
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.4
+    rev: v0.9.4
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
       - id: ruff-format
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.2
+    rev: v1.14.1
     hooks:
       - id: mypy
 
@@ -88,25 +89,27 @@ repos:
           (?x)^(
               modelopt/onnx/quantization/operators.py|
               modelopt/onnx/quantization/ort_patching.py|
+              modelopt/torch/_deploy/utils/onnx_utils.py|
               modelopt/torch/export/transformer_engine.py|
               modelopt/torch/quantization/export_onnx.py|
               modelopt/torch/quantization/plugins/attention.py|
-              modelopt/torch/speculative/plugins/transformers.py|
               modelopt/torch/speculative/eagle/utils.py|
-              modelopt/torch/_deploy/utils/onnx_utils.py|
+              modelopt/torch/speculative/plugins/transformers.py|
               examples/chained_optimizations/bert_prune_distill_quantize.py|
-              examples/diffusers/quantization/onnx_utils/export.py|
               examples/diffusers/cache_diffusion/pipeline/models/sdxl.py|
+              examples/diffusers/quantization/onnx_utils/export.py|
               examples/llm_eval/gen_model_answer.py|
               examples/llm_eval/humaneval.py|
               examples/llm_eval/lm_eval_hf.py|
               examples/llm_eval/mmlu.py|
               examples/llm_eval/modeling.py|
-              examples/llm_sparsity/finetune.py|
               examples/llm_qat/main.py|
+              examples/llm_sparsity/finetune.py|
               examples/speculative_decoding/main.py|
               examples/speculative_decoding/medusa_utils.py|
               examples/speculative_decoding/vllm_generate.py|
+              examples/deepseek/quantize_to_nvfp4.py|
+              examples/deepseek/ptq.py|
           )$
 
       # Default hook for Apache 2.0 in core c/c++/cuda files
@@ -132,7 +135,7 @@ repos:
         types_or: [shell]
 
   - repo: https://github.com/keith/pre-commit-buildifier
-    rev: 6.4.0
+    rev: 8.0.1
     hooks:
       - id: buildifier
       - id: buildifier-lint
@@ -143,3 +146,12 @@ repos:
       - id: bandit
         args: ["-c", "pyproject.toml", "-q"]
         additional_dependencies: ["bandit[toml]"]
+
+  # Link checker
+  - repo: https://github.com/lycheeverse/lychee.git
+    rev: v0.15.1
+    hooks:
+      - id: lychee
+        args: ["--no-progress", "--exclude-loopback"]
+        stages: [manual] # Only run with `pre-commit run --all-files --hook-stage manual lychee`
+        exclude: internal/
@@ -1,6 +1,28 @@
 Model Optimizer Changelog (Linux)
 =================================
 
+0.25 (2025-03-03)
+^^^^^^^^^^^^^^^^^
+
+**Backward Breaking Changes**
+
+- Deprecate Torch 2.1 support.
+- Deprecate ``humaneval`` benchmark in ``llm_eval`` examples. Please use the newly added ``simple_eval`` instead.
+- Deprecate ``fp8_naive`` quantization format in ``llm_ptq`` examples. Please use ``fp8`` instead.
+
+**New Features**
+
+- Support fast hadamard transform in :class:`TensorQuantizer <modelopt.torch.quantization.nn.modules.TensorQuantizer>`.
+  It can be used for rotation based quantization methods, e.g. QuaRot. Users need to install the package `fast_hadamard_transfrom <https://github.com/Dao-AILab/fast-hadamard-transform>`_ to use this feature.
+- Add affine quantization support for the KV cache, resolving the low accuracy issue in models such as Qwen2.5 and Phi-3/3.5.
+- Add FSDP2 support. FSDP2 can now be used for QAT.
+- Add `LiveCodeBench <https://livecodebench.github.io/>`_  and `Simple Evals <https://github.com/openai/simple-evals>`_ to the ``llm_eval`` examples.
+- Disabled saving modelopt state in unified hf export APIs by default, i.e., added ``save_modelopt_state`` flag in ``export_hf_checkpoint`` API and by default set to False.
+- Add FP8 and NVFP4 real quantization support with LLM QLoRA example.
+- The :class:`modelopt.deploy.llm.LLM` now support use the :class:`tensorrt_llm._torch.LLM` backend for the quantized HuggingFace checkpoints.
+- Add `NVFP4 PTQ example for DeepSeek-R1 <https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/deepseek>`_.
+- Add end-to-end `AutoDeploy example for AutoQuant LLM models <https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/llm_autodeploy>`_.
+
 0.23 (2025-01-29)
 ^^^^^^^^^^^^^^^^^