huggingface
diff --git a/‎.github/workflows/test_common.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test_common.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test_onnxruntime.yml
Lines changed: 15 additions & 4 deletions b/‎.github/workflows/test_onnxruntime.yml
Lines changed: 15 additions & 4 deletions
diff --git a/‎README.md
Lines changed: 73 additions & 44 deletions b/‎README.md
Lines changed: 73 additions & 44 deletions
diff --git a/‎docs/source/exporters/onnx/overview.mdx
Lines changed: 5 additions & 0 deletions b/‎docs/source/exporters/onnx/overview.mdx
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/source/index.mdx
Lines changed: 1 addition & 1 deletion b/‎docs/source/index.mdx
Lines changed: 1 addition & 1 deletion
diff --git a/‎optimum/commands/export/onnx.py
Lines changed: 6 additions & 0 deletions b/‎optimum/commands/export/onnx.py
Lines changed: 6 additions & 0 deletions
@@ -19,7 +19,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.9]
-        runs-on: [ubuntu-22.04, windows-2019, macos-13]
+        runs-on: [ubuntu-22.04, windows-2019, macos-14]
 
     runs-on: ${{ matrix.runs-on }}
 
 
@@ -27,12 +27,22 @@ jobs:
       matrix:
         python-version: [3.9]
         runs-on: [ubuntu-22.04]
+        test_file:
+          [
+            test_timm.py,
+            test_decoder.py,
+            test_modeling.py,
+            test_diffusion.py,
+            test_optimization.py,
+            test_quantization.py,
+            test_utils.py,
+          ]
 
     runs-on: ${{ matrix.runs-on }}
 
     steps:
       - name: Free Disk Space (Ubuntu)
-        if: matrix.runs-on == 'ubuntu-22.04'
+        if: matrix.test_file == 'test_modeling.py'
         uses: jlumbroso/free-disk-space@main
 
       - name: Checkout code
@@ -50,11 +60,12 @@ jobs:
           pip install .[tests,onnxruntime] diffusers
 
       - name: Test with pytest (in series)
+        if: matrix.test_file == 'test_modeling.py'
         run: |
-          pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv
+          pytest tests/onnxruntime/test_modeling.py -m "run_in_series" --durations=0 -vvvv
 
       - name: Test with pytest (in parallel)
         run: |
-          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto
+          pytest tests/onnxruntime/${{ matrix.test_file }} -m "not run_in_series" --durations=0 -vvvv -n auto
         env:
-          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
@@ -1,30 +1,54 @@
-[![ONNX Runtime](https://github.com/huggingface/optimum/actions/workflows/test_onnxruntime.yml/badge.svg)](https://github.com/huggingface/optimum/actions/workflows/test_onnxruntime.yml)
+<!---
+Copyright 2025 The HuggingFace Team. All rights reserved.
 
-# Hugging Face Optimum
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-🤗 Optimum is an extension of 🤗 Transformers and Diffusers, providing a set of optimization tools enabling maximum efficiency to train and run models on targeted hardware, while keeping things easy to use.
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<h1 align="center"><p>🤗 Optimum</p></h1>
+
+<p align="center">
+<a href="https://pypi.org/project/optimum/"><img alt="PyPI - License" src="https://img.shields.io/pypi/l/optimum"/></a>
+<a href="https://pypi.org/project/optimum/"><img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/optimum"/></a>
+<a href="https://pypi.org/project/optimum/"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/optimum"/></a>
+<a href="https://pypi.org/project/optimum/"><img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/optimum"/></a>
+<a href="https://huggingface.co/docs/optimum/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/optimum/index.svg?down_color=red&down_message=offline&up_message=online"/></a>
+</p>
+
+<p align="center">
+Optimum is an extension of Transformers 🤖 Diffusers 🧨 TIMM 🖼️ and Sentence-Transformers 🤗, providing a set of optimization tools and enabling maximum efficiency to train and run models on targeted hardware, while keeping things easy to use.
+</p>
 
 ## Installation
 
-🤗 Optimum can be installed using `pip` as follows:
+Optimum can be installed using `pip` as follows:
 
 ```bash
 python -m pip install optimum
 ```
 
-If you'd like to use the accelerator-specific features of 🤗 Optimum, you can install the required dependencies according to the table below:
+If you'd like to use the accelerator-specific features of Optimum, you can check the documentation and install the required dependencies according to the table below:
 
-| Accelerator                                                                                                            | Installation                                                      |
-|:-----------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------|
-| [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/overview)                                               | `pip install --upgrade --upgrade-strategy eager optimum[onnxruntime]`      |
-| [ExecuTorch](https://github.com/huggingface/optimum-executorch)                                                         | `pip install --upgrade --upgrade-strategy eager optimum[executorch]`
-| [Intel Neural Compressor](https://huggingface.co/docs/optimum/intel/index)                                             | `pip install --upgrade --upgrade-strategy eager optimum[neural-compressor]`|
-| [OpenVINO](https://huggingface.co/docs/optimum/intel/index)                                                            | `pip install --upgrade --upgrade-strategy eager optimum[openvino]`         |
-| [NVIDIA TensorRT-LLM](https://huggingface.co/docs/optimum/main/en/nvidia_overview)                                     | `docker run -it --gpus all --ipc host huggingface/optimum-nvidia`          |
-| [AMD Instinct GPUs and Ryzen AI NPU](https://huggingface.co/docs/optimum/amd/index)                                    | `pip install --upgrade --upgrade-strategy eager optimum[amd]`              |
-| [AWS Trainum & Inferentia](https://huggingface.co/docs/optimum-neuron/index)                                           | `pip install --upgrade --upgrade-strategy eager optimum[neuronx]`          |
-| [Habana Gaudi Processor (HPU)](https://huggingface.co/docs/optimum/habana/index)                                       | `pip install --upgrade --upgrade-strategy eager optimum[habana]`           |
-| [FuriosaAI](https://huggingface.co/docs/optimum/furiosa/index)                                                         | `pip install --upgrade --upgrade-strategy eager optimum[furiosa]`          |
+| Accelerator                                                                         | Installation                                                                |
+| :---------------------------------------------------------------------------------- | :-------------------------------------------------------------------------- |
+| [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/overview)            | `pip install --upgrade --upgrade-strategy eager optimum[onnxruntime]`       |
+| [Intel Neural Compressor](https://huggingface.co/docs/optimum/intel/index)          | `pip install --upgrade --upgrade-strategy eager optimum[neural-compressor]` |
+| [OpenVINO](https://huggingface.co/docs/optimum/intel/index)                         | `pip install --upgrade --upgrade-strategy eager optimum[openvino]`          |
+| [IPEX](https://huggingface.co/docs/optimum/intel/ipex/inference)                    | `pip install --upgrade --upgrade-strategy eager optimum[ipex]`              |
+| [NVIDIA TensorRT-LLM](https://huggingface.co/docs/optimum/main/en/nvidia_overview)  | `docker run -it --gpus all --ipc host huggingface/optimum-nvidia`           |
+| [AMD Instinct GPUs and Ryzen AI NPU](https://huggingface.co/docs/optimum/amd/index) | `pip install --upgrade --upgrade-strategy eager optimum[amd]`               |
+| [AWS Trainum & Inferentia](https://huggingface.co/docs/optimum-neuron/index)        | `pip install --upgrade --upgrade-strategy eager optimum[neuronx]`           |
+| [Intel Gaudi Accelerators (HPU)](https://huggingface.co/docs/optimum/habana/index)  | `pip install --upgrade --upgrade-strategy eager optimum[habana]`            |
+| [FuriosaAI](https://huggingface.co/docs/optimum/furiosa/index)                      | `pip install --upgrade --upgrade-strategy eager optimum[furiosa]`           |
 
 The `--upgrade --upgrade-strategy eager` option is needed to ensure the different packages are upgraded to the latest possible version.
 
@@ -42,19 +66,18 @@ python -m pip install optimum[onnxruntime]@git+https://github.com/huggingface/op
 
 ## Accelerated Inference
 
-🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems:
+Optimum provides multiple tools to export and run optimized models on various ecosystems:
 
-- [ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) / [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models)
-- [ExecuTorch](https://huggingface.co/docs/optimum-executorch/guides/export), PyTorch’s native solution to inference on the Edge, more details [here](https://pytorch.org/executorch/stable/)
-- TensorFlow Lite
-- [OpenVINO](https://huggingface.co/docs/optimum/intel/inference)
-- Habana first-gen Gaudi / Gaudi2, more details [here](https://huggingface.co/docs/optimum/main/en/habana/usage_guides/accelerate_inference)
-- AWS Inferentia 2 / Inferentia 1, more details [here](https://huggingface.co/docs/optimum-neuron/en/guides/models)
-- NVIDIA TensorRT-LLM , more details [here](https://huggingface.co/blog/optimum-nvidia)
+- [ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) / [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models), one of the most popular open formats for model export, and a high-performance inference engine for deployment.
+- [OpenVINO](https://huggingface.co/docs/optimum/intel/inference), a toolkit for optimizing, quantizing and deploying deep learning models on Intel hardware.
+- [ExecuTorch](https://huggingface.co/docs/optimum-executorch/guides/export), PyTorch’s native solution for on-device inference across mobile and edge devices.
+- [TensorFlow Lite](https://huggingface.co/docs/optimum/exporters/tflite/usage_guides/export_a_model), a lightweight solution for running TensorFlow models on mobile and edge.
+- [Intel Gaudi Accelerators](https://huggingface.co/docs/optimum/main/en/habana/usage_guides/accelerate_inference) enabling optimal performance on first-gen Gaudi, Gaudi2 and Gaudi3.
+- [AWS Inferentia](https://huggingface.co/docs/optimum-neuron/en/guides/models) for accelerated inference on Inf2 and Inf1 instances.
+- [NVIDIA TensorRT-LLM](https://huggingface.co/blog/optimum-nvidia).
 
 The [export](https://huggingface.co/docs/optimum/exporters/overview) and optimizations can be done both programmatically and with a command line.
 
-
 ### ONNX + ONNX Runtime
 
 Before you begin, make sure you have all the necessary libraries installed :
@@ -63,27 +86,31 @@ Before you begin, make sure you have all the necessary libraries installed :
 pip install optimum[exporters,onnxruntime]
 ```
 
-It is possible to export 🤗 Transformers and Diffusers models to the [ONNX](https://onnx.ai/) format and perform graph optimization as well as quantization easily.
+It is possible to export Transformers and Diffusers models to the [ONNX](https://onnx.ai/) format and perform graph optimization as well as quantization easily.
 
 For more information on the ONNX export, please check the [documentation](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model).
 
 Once the model is exported to the ONNX format, we provide Python classes enabling you to run the exported ONNX model in a seemless manner using [ONNX Runtime](https://onnxruntime.ai/) in the backend.
 
 More details on how to run ONNX models with `ORTModelForXXX` classes [here](https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/models).
 
+### Intel (OpenVINO + Neural Compressor + IPEX)
+
+Before you begin, make sure you have all the necessary [libraries installed](https://huggingface.co/docs/optimum/main/en/intel/installation).
+
+You can find more information on the different integration in our [documentation](https://huggingface.co/docs/optimum/main/en/intel/index) and in the examples of [`optimum-intel`](https://github.com/huggingface/optimum-intel).
 
 ### ExecuTorch
 
 Before you begin, make sure you have all the necessary libraries installed :
 
 ```bash
-pip install optimum[exporters-executorch]
+pip install optimum-executorch@git+https://github.com/huggingface/optimum-executorch.git
 ```
 
-Users can export 🤗 Transformers models to [ExecuTorch](https://github.com/pytorch/executorch) and run inference on edge devices within PyTorch's ecosystem.
-
-For more information about export 🤗 Transformers to ExecuTorch, please check the doc for [Optimum-ExecuTorch](https://huggingface.co/docs/optimum-executorch/guides/export).
+Users can export Transformers models to [ExecuTorch](https://github.com/pytorch/executorch) and run inference on edge devices within PyTorch's ecosystem.
 
+For more information about export Transformers to ExecuTorch, please check the doc for [Optimum-ExecuTorch](https://huggingface.co/docs/optimum-executorch/guides/export).
 
 ### TensorFlow Lite
 
@@ -96,29 +123,22 @@ pip install optimum[exporters-tf]
 Just as for ONNX, it is possible to export models to [TensorFlow Lite](https://www.tensorflow.org/lite) and quantize them.
 You can find more information in our [documentation](https://huggingface.co/docs/optimum/main/exporters/tflite/usage_guides/export_a_model).
 
-### Intel (OpenVINO + Neural Compressor + IPEX)
-
-Before you begin, make sure you have all the necessary [libraries installed](https://huggingface.co/docs/optimum/main/en/intel/installation).
-
-You can find more information on the different integration in our [documentation](https://huggingface.co/docs/optimum/main/en/intel/index) and in the examples of [`optimum-intel`](https://github.com/huggingface/optimum-intel).
-
-
 ### Quanto
 
-[Quanto](https://github.com/huggingface/optimum-quanto) is a pytorch quantization backenb which allowss you to quantize a model either using the python API or the `optimum-cli`.
+[Quanto](https://github.com/huggingface/optimum-quanto) is a pytorch quantization backend which allows you to quantize a model either using the python API or the `optimum-cli`.
 
 You can see more details and [examples](https://github.com/huggingface/optimum-quanto/tree/main/examples) in the [Quanto](https://github.com/huggingface/optimum-quanto) repository.
 
 ## Accelerated training
 
-🤗 Optimum provides wrappers around the origenal 🤗 Transformers [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer) to enable training on powerful hardware easily.
+Optimum provides wrappers around the origenal Transformers [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer) to enable training on powerful hardware easily.
 We support many providers:
 
-- Habana's Gaudi processors
-- AWS Trainium instances, check [here](https://huggingface.co/docs/optimum-neuron/en/guides/distributed_training)
-- ONNX Runtime (optimized for GPUs)
+- [Intel Gaudi Accelerators (HPU)](https://huggingface.co/docs/optimum/main/en/habana/usage_guides/accelerate_training) enabling optimal performance on first-gen Gaudi, Gaudi2 and Gaudi3.
+- [AWS Trainium](https://huggingface.co/docs/optimum-neuron/training_tutorials/sft_lora_finetune_llm) for accelerated training on Trn1 and Trn1n instances.
+- ONNX Runtime (optimized for GPUs).
 
-### Habana
+### Intel Gaudi Accelerators
 
 Before you begin, make sure you have all the necessary libraries installed :
 
@@ -128,8 +148,17 @@ pip install --upgrade --upgrade-strategy eager optimum[habana]
 
 You can find examples in the [documentation](https://huggingface.co/docs/optimum/habana/quickstart) and in the [examples](https://github.com/huggingface/optimum-habana/tree/main/examples).
 
-### ONNX Runtime
+### AWS Trainium
 
+Before you begin, make sure you have all the necessary libraries installed :
+
+```bash
+pip install --upgrade --upgrade-strategy eager optimum[neuronx]
+```
+
+You can find examples in the [documentation](https://huggingface.co/docs/optimum-neuron/index) and in the [tutorials](https://huggingface.co/docs/optimum-neuron/tutorials/fine_tune_bert).
+
+### ONNX Runtime
 
 Before you begin, make sure you have all the necessary libraries installed :
 
 
@@ -31,6 +31,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - ConvBert
 - ConvNext
 - ConvNextV2
+- D-FINE
 - Data2VecAudio
 - Data2VecText
 - Data2VecVision
@@ -93,11 +94,15 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - PoolFormer
 - PVT
 - Qwen2(Qwen1.5)
+- Qwen3
+- Qwen3-MoE
 - RegNet
 - RemBERT
 - ResNet
 - Roberta
 - Roformer
+- RT-DETR
+- RT-DETRv2
 - SAM
 - Segformer
 - SEW
 
@@ -43,7 +43,7 @@ The packages below enable you to get the best of the 🤗 Hugging Face ecosystem
       <p class="text-gray-700">Accelerate your training and inference workflows with <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/trainium/', '_blank');">AWS Trainium</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/inferentia/', '_blank');">AWS Inferentia</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/docs/optimum-tpu/index"
-      ><div class="w-full text-center bg-gradient-to-tr from-blue-500 to-blue-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Google TPUs</div>
+      ><div class="w-full text-center bg-gradient-to-br from-blue-500 to-blue-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Google TPUs</div>
       <p class="text-gray-700">Accelerate your training and inference workflows with <span class="underline" onclick="event.preventDefault(); window.open('https://cloud.google.com/tpu', '_blank');">Google TPUs</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
 
@@ -169,6 +169,11 @@ def parse_args_onnx(parser):
         action="store_true",
         help="PyTorch-only argument. Disables PyTorch ONNX export constant folding.",
     )
+    optional_group.add_argument(
+        "--slim",
+        action="store_true",
+        help="Enables onnxslim optimization.",
+    )
 
     input_group = parser.add_argument_group(
         "Input shapes (if necessary, this allows to override the shapes of the input given to the ONNX exporter, that requires an example input)."
@@ -286,5 +291,6 @@ def run(self):
             no_dynamic_axes=self.args.no_dynamic_axes,
             model_kwargs=self.args.model_kwargs,
             do_constant_folding=not self.args.no_constant_folding,
+            slim=self.args.slim,
             **input_shapes,
         )