From 0e37fc8d3d2051a5fd2e63e9b3c8d307af5b48cd Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 15:51:58 -0700
Subject: [PATCH 01/17] Update

[ghstack-poisoned]
---
 .github/workflows/inductor-perf-test-nightly-h100.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml
index c94996f58002..4807f4a29b08 100644
--- a/.github/workflows/inductor-perf-test-nightly-h100.yml
+++ b/.github/workflows/inductor-perf-test-nightly-h100.yml
@@ -86,6 +86,11 @@ jobs:
     needs: get-label-type
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      # Use a bigger runner here because CUDA_ARCH 9.0 is only built for H100
+      # or newer GPUs, so it doesn't benefit much from existing compiler cache
+      # from trunk. Also use a memory-intensive runner here because memory is
+      # usually the bottleneck
+      runner: linux.12xlarge.memory
       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '9.0'

From ec8fa3c1da2acff020a71e6ca2cfaf10340d597b Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 15:52:04 -0700
Subject: [PATCH 02/17] Update

[ghstack-poisoned]
---
 .github/workflows/_linux-build.yml | 11 -----------
 .github/workflows/periodic.yml     |  2 --
 2 files changed, 13 deletions(-)

diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
index f1e2f917f4bc..dd8113cdf337 100644
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@@ -16,11 +16,6 @@ on:
         type: boolean
         default: true
         description: If set, upload generated build artifacts.
-      build-with-debug:
-        required: false
-        type: boolean
-        default: false
-        description: If set, build in debug mode.
       sync-tag:
         required: false
         type: string
@@ -87,7 +82,6 @@ on:
         required: false
         type: number
         default: 1
-
       allow-reuse-old-whl:
         description: |
           If set, the build try to pull an old wheel from s3 that was built on a
@@ -106,7 +100,6 @@ on:
         description: |
           FB app token to write to scribe endpoint
 
-
     outputs:
       docker-image:
         value: ${{ jobs.build.outputs.docker-image }}
@@ -247,8 +240,6 @@ jobs:
         env:
           BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
           BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          # TODO duplicated
-          AWS_DEFAULT_REGION: us-east-1
           PR_NUMBER: ${{ github.event.pull_request.number }}
           SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
           # Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
@@ -260,7 +251,6 @@ jobs:
           DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
           DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }}
           XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
-          DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
           OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
           HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
           SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
@@ -295,7 +285,6 @@ jobs:
           container_name=$(docker run \
             -e BUILD_ENVIRONMENT \
             -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
             -e PR_NUMBER \
             -e SHA1 \
             -e BRANCH \
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
index 643d40e4d381..976fb241c99f 100644
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@@ -157,7 +157,6 @@ jobs:
           { config: "multigpu", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
           { config: "multigpu", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
         ]}
-      build-with-debug: false
     secrets: inherit
 
   linux-jammy-cuda12_8-py3_9-gcc9-test:
@@ -178,7 +177,6 @@ jobs:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-debug
       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9
-      build-with-debug: true
       test-matrix: |
         { include: [
           { config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] },

From 3aec24e02e71c76bb6ee24b29c5f166e5dbb0910 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 15:52:09 -0700
Subject: [PATCH 03/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/build.sh               | 14 +++++
 .ci/pytorch/common_utils.sh        | 94 ++++++++++++++++++++++--------
 .ci/pytorch/test.sh                | 10 ++--
 .github/workflows/_linux-build.yml |  9 +++
 4 files changed, 97 insertions(+), 30 deletions(-)

diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh
index 994bd179e464..4a622badbcf7 100755
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@@ -306,6 +306,20 @@ else
     fi
     pip_install_whl "$(echo dist/*.whl)"
 
+    if [[ -n "${BUILD_ADDITIONAL_PACKAGES}" ]]; then
+      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *vision* ]]; then
+        install_torchvision
+      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *audio* ]]; then
+        install_torchaudio
+      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *text* ]]; then
+        install_torchtext
+      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchrec* || "${BUILD_ADDITIONAL_PACKAGES}" == *fbgemm* ]]; then
+        install_torchrec_and_fbgemm
+      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchao* ]]; then
+        install_torchao
+      fi
+    fi
+
     if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
       echo "Checking that xpu is compiled"
       pushd dist/
diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index 3dbc2ece9e70..69a5b7ad3795 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -78,6 +78,34 @@ function pip_install_whl() {
   fi
 }
 
+function pip_build_and_install() {
+  local build_target=$1
+  local wheel_dir=$2
+
+  local found_whl=0
+  for file in "${wheel_dir}"/*.whl
+  do
+    if [[ -f "${file}" ]]; then
+      found_whl=1
+      break
+    fi
+  done
+
+  # Build the wheel if it doesn't exist
+  if [ "${found_whl}" == "0" ]; then
+    python3 -m pip wheel \
+      --no-build-isolation \
+      --no-deps \
+      --no-use-pep517 \
+      -w "${wheel_dir}" \
+      "${build_target}"
+  fi
+
+  for file in "${wheel_dir}"/*.whl
+  do
+    pip_install_whl "${file}"
+  done
+}
 
 function pip_install() {
   # retry 3 times
@@ -124,14 +152,7 @@ function get_pinned_commit() {
 function install_torchaudio() {
   local commit
   commit=$(get_pinned_commit audio)
-  if [[ "$1" == "cuda" ]]; then
-    # TODO: This is better to be passed as a parameter from _linux-test workflow
-    # so that it can be consistent with what is set in build
-    TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 "git+https://github.com/pytorch/audio.git@${commit}"
-  else
-    pip_install --no-use-pep517 "git+https://github.com/pytorch/audio.git@${commit}"
-  fi
-
+  pip_build_and_install "git+https://github.com/pytorch/audio.git@${commit}" dist/audio
 }
 
 function install_torchtext() {
@@ -139,8 +160,8 @@ function install_torchtext() {
   local text_commit
   data_commit=$(get_pinned_commit data)
   text_commit=$(get_pinned_commit text)
-  pip_install --no-use-pep517 "git+https://github.com/pytorch/data.git@${data_commit}"
-  pip_install --no-use-pep517 "git+https://github.com/pytorch/text.git@${text_commit}"
+  pip_build_and_install "git+https://github.com/pytorch/data.git@${data_commit}" dist/data
+  pip_build_and_install "git+https://github.com/pytorch/text.git@${text_commit}" dist/text
 }
 
 function install_torchvision() {
@@ -153,7 +174,7 @@ function install_torchvision() {
     echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c -
     LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
   fi
-  pip_install --no-use-pep517 "git+https://github.com/pytorch/vision.git@${commit}"
+  pip_build_and_install "git+https://github.com/pytorch/vision.git@${commit}" dist/vision
   if [ -n "${LD_PRELOAD}" ]; then
     LD_PRELOAD=${orig_preload}
   fi
@@ -173,25 +194,48 @@ function install_torchrec_and_fbgemm() {
 
   if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
     # install torchrec first because it installs fbgemm nightly on top of rocm fbgemm
-    pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
+    pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
     pip_uninstall fbgemm-gpu-nightly
 
     pip_install tabulate  # needed for newer fbgemm
     pip_install patchelf  # needed for rocm fbgemm
-    git clone --recursive https://github.com/pytorch/fbgemm
-    pushd fbgemm/fbgemm_gpu
-    git checkout "${fbgemm_commit}"
-    python setup.py install \
-      --package_variant=rocm \
-      -DHIP_ROOT_DIR="${ROCM_PATH}" \
-      -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
-      -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
-    popd
+
+    local wheel_dir=dist/fbgemm_gpu
+    local found_whl=0
+    for file in "${wheel_dir}"/*.whl
+    do
+      if [[ -f "${file}" ]]; then
+        found_whl=1
+        break
+      fi
+    done
+
+    # Build the wheel if it doesn't exist
+    if [ "${found_whl}" == "0" ]; then
+      git clone --recursive https://github.com/pytorch/fbgemm
+      pushd fbgemm/fbgemm_gpu
+      git checkout "${fbgemm_commit}"
+      python setup.py bdist_wheel \
+        --package_variant=rocm \
+        -DHIP_ROOT_DIR="${ROCM_PATH}" \
+        -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
+        -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
+      popd
+
+      # Save the wheel before cleaning up
+      mkdir -p dist/fbgemm_gpu
+      cp fbgemm/fbgemm_gpu/dist/*.whl dist/fbgemm_gpu
+    fi
+
+    for file in "${wheel_dir}"/*.whl
+    do
+      pip_install_whl "${file}"
+    done
+
     rm -rf fbgemm
   else
-    # See https://github.com/pytorch/pytorch/issues/106971
-    CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
-    pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
+    pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
+    pip_build_and_install "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#subdirectory=fbgemm_gpu" dist/fbgemm_gpu
   fi
 }
 
@@ -234,7 +278,7 @@ function checkout_install_torchbench() {
 function install_torchao() {
   local commit
   commit=$(get_pinned_commit torchao)
-  pip_install --no-use-pep517 "git+https://github.com/pytorch/ao.git@${commit}"
+  pip_build_and_install "git+https://github.com/pytorch/ao.git@${commit}" dist/ao
 }
 
 function print_sccache_stats() {
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index a51a7e472c97..78542dc628cf 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -1660,23 +1660,23 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
   id=$((SHARD_NUMBER-1))
   test_dynamo_benchmark timm_models "$id"
 elif [[ "${TEST_CONFIG}" == cachebench ]]; then
-  install_torchaudio cuda
+  install_torchaudio
   install_torchvision
   checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
   PYTHONPATH=$(pwd)/torchbench test_cachebench
 elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
-  install_torchaudio cpu
+  install_torchaudio
   install_torchvision
   checkout_install_torchbench nanogpt
   PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
 elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   if [[ "${TEST_CONFIG}" == *cpu* ]]; then
-    install_torchaudio cpu
+    install_torchaudio
   else
-    install_torchaudio cuda
+    install_torchaudio
   fi
   install_torchvision
-  TORCH_CUDA_ARCH_LIST="8.0;8.6" install_torchao
+  install_torchao
   id=$((SHARD_NUMBER-1))
   # https://github.com/opencv/opencv-python/issues/885
   pip_install opencv-python==4.8.0.74
diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
index dd8113cdf337..586f96235ada 100644
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@@ -89,6 +89,13 @@ on:
         required: false
         type: boolean
         default: true
+      build-additional-packages:
+        description: |
+          If set, the build job will also builds these packages and saves their
+          wheels as artifacts
+        required: false
+        type: string
+        default: ""
 
     secrets:
       HUGGING_FACE_HUB_TOKEN:
@@ -254,6 +261,7 @@ jobs:
           OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
           HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
           SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          BUILD_ADDITIONAL_PACKAGES: ${{ inputs.build-additional-packages }}
         run: |
           START_TIME=$(date +%s)
           if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
@@ -299,6 +307,7 @@ jobs:
             -e HUGGING_FACE_HUB_TOKEN \
             -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
             -e USE_SPLIT_BUILD \
+            -e BUILD_ADDITIONAL_PACKAGES \
             --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
             --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
             --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \

From 30c21cb1b854480090bc4fdbcbab5a394b7eb6b4 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 15:52:15 -0700
Subject: [PATCH 04/17] Update

[ghstack-poisoned]
---
 .github/workflows/inductor-perf-test-nightly-h100.yml | 1 +
 .github/workflows/inductor-perf-test-nightly.yml      | 3 +++
 .github/workflows/inductor-periodic.yml               | 1 +
 .github/workflows/test-h100.yml                       | 3 ++-
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml
index 4807f4a29b08..f0429baf54f3 100644
--- a/.github/workflows/inductor-perf-test-nightly-h100.yml
+++ b/.github/workflows/inductor-perf-test-nightly-h100.yml
@@ -119,6 +119,7 @@ jobs:
           { config: "inductor_torchbench_perf_cuda_h100", shard: 9, num_shards: 9, runner: "linux.aws.h100" },
         ]}
       selected-test-configs: ${{ inputs.benchmark_configs }}
+      build-additional-packages: "vision audio text fbgemm torchao"
     secrets: inherit
 
   test-periodically:
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
index 015204473339..27c41ca477d4 100644
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@@ -86,6 +86,8 @@ jobs:
     needs: get-label-type
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      # Every bit to make perf run faster helps
+      runner: linux.12xlarge.memory
       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
@@ -112,6 +114,7 @@ jobs:
           { config: "cachebench", shard: 2, num_shards: 2, runner: "linux.aws.a100" },
         ]}
       selected-test-configs: ${{ inputs.benchmark_configs }}
+      build-additional-packages: "vision audio text fbgemm torchao"
     secrets: inherit
 
   test-nightly:
diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml
index 2e16c2e403fb..8b36624f7746 100644
--- a/.github/workflows/inductor-periodic.yml
+++ b/.github/workflows/inductor-periodic.yml
@@ -135,6 +135,7 @@ jobs:
       build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
       docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-smoke-build.outputs.test-matrix }}
+      build-additional-packages: "vision audio text fbgemm torchao"
     secrets: inherit
 
   linux-jammy-cpu-py3_9-gcc11-periodic-dynamo-benchmarks-build:
diff --git a/.github/workflows/test-h100.yml b/.github/workflows/test-h100.yml
index 40eff83ba58d..67c5f53b877e 100644
--- a/.github/workflows/test-h100.yml
+++ b/.github/workflows/test-h100.yml
@@ -37,7 +37,7 @@ jobs:
     needs: get-label-type
     with:
       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      runner: "linux.12xlarge"
+      runner: linux.12xlarge.memory
       build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
       docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
       cuda-arch-list: '9.0'
@@ -45,6 +45,7 @@ jobs:
         { include: [
           { config: "smoke", shard: 1, num_shards: 1, runner: "linux.aws.h100" },
         ]}
+      build-additional-packages: "vision audio text fbgemm torchao"
     secrets: inherit
 
   linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:

From 444df0faa1902563f8913f68aa128f3f3e34d659 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 16:33:57 -0700
Subject: [PATCH 05/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/build.sh | 16 ++++++++++++----
 .ci/pytorch/test.sh  |  6 +-----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh
index 4a622badbcf7..1fcc9f72053d 100755
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@@ -309,13 +309,21 @@ else
     if [[ -n "${BUILD_ADDITIONAL_PACKAGES}" ]]; then
       if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *vision* ]]; then
         install_torchvision
-      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *audio* ]]; then
+      fi
+
+      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *audio* ]]; then
         install_torchaudio
-      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *text* ]]; then
+      fi
+
+      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *text* ]]; then
         install_torchtext
-      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchrec* || "${BUILD_ADDITIONAL_PACKAGES}" == *fbgemm* ]]; then
+      fi
+
+      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchrec* || "${BUILD_ADDITIONAL_PACKAGES}" == *fbgemm* ]]; then
         install_torchrec_and_fbgemm
-      elif [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchao* ]]; then
+      fi
+
+      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchao* ]]; then
         install_torchao
       fi
     fi
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 78542dc628cf..6e89cd78b98d 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -1670,11 +1670,7 @@ elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
   checkout_install_torchbench nanogpt
   PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
 elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
-  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
-    install_torchaudio
-  else
-    install_torchaudio
-  fi
+  install_torchaudio
   install_torchvision
   install_torchao
   id=$((SHARD_NUMBER-1))

From a151d802c3cbf998e101bf0276aa547a5cdb94ee Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 17:21:04 -0700
Subject: [PATCH 06/17] Update

[ghstack-poisoned]
---
 .ci/docker/build.sh                           |  2 +-
 .ci/docker/ci_commit_pins/torchbench.txt      |  1 +
 .../common/install_inductor_benchmark_deps.sh | 21 ++++++++++++++++
 .ci/pytorch/common_utils.sh                   | 24 -------------------
 .ci/pytorch/test.sh                           |  8 -------
 .github/ci_commit_pins/torchbench.txt         |  1 -
 6 files changed, 23 insertions(+), 34 deletions(-)
 create mode 120000 .ci/docker/ci_commit_pins/torchbench.txt
 delete mode 100644 .github/ci_commit_pins/torchbench.txt

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index d6cba6659db7..d8de42368200 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -276,7 +276,7 @@ case "$tag" in
     NINJA_VERSION=1.9.0
     TRITON=yes
     ;;
-    pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
+  pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
     ANACONDA_PYTHON_VERSION=3.9
     GCC_VERSION=11
     VISION=yes
diff --git a/.ci/docker/ci_commit_pins/torchbench.txt b/.ci/docker/ci_commit_pins/torchbench.txt
new file mode 120000
index 000000000000..eb6cadaa0696
--- /dev/null
+++ b/.ci/docker/ci_commit_pins/torchbench.txt
@@ -0,0 +1 @@
+.ci/docker/ci_commit_pins/torchbench.txt
\ No newline at end of file
diff --git a/.ci/docker/common/install_inductor_benchmark_deps.sh b/.ci/docker/common/install_inductor_benchmark_deps.sh
index 7312dce170db..5e6d77007f83 100644
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@@ -19,7 +19,28 @@ function install_timm() {
   conda_run pip uninstall -y torch torchvision triton
 }
 
+function install_torchbench() {
+  local commit
+  commit=$(get_pinned_commit torchbench)
+  git clone https://github.com/pytorch/benchmark torchbench
+  pushd torchbench
+  git checkout "$commit"
+
+  # Stable packages are ok here, just to satify TorchBench check
+  pip_install torch torchvision torchaudio
+  python install.py --continue_on_fail
+
+  # TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
+  # is regressing speedup metric. This needs to be investigated further
+  pip install transformers==4.38.1
+
+  echo "Print all dependencies after TorchBench is installed"
+  python -mpip freeze
+  popd
+}
+
 # Pango is needed for weasyprint which is needed for doctr
 conda_install pango
 install_huggingface
 install_timm
+install_torchbench
diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index 69a5b7ad3795..a101965cac90 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -251,30 +251,6 @@ function clone_pytorch_xla() {
   fi
 }
 
-function checkout_install_torchbench() {
-  local commit
-  commit=$(get_pinned_commit torchbench)
-  git clone https://github.com/pytorch/benchmark torchbench
-  pushd torchbench
-  git checkout "$commit"
-
-  if [ "$1" ]; then
-    python install.py --continue_on_fail models "$@"
-  else
-    # Occasionally the installation may fail on one model but it is ok to continue
-    # to install and test other models
-    python install.py --continue_on_fail
-  fi
-
-  # TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
-  # is regressing speedup metric. This needs to be investigated further
-  pip install transformers==4.38.1
-
-  echo "Print all dependencies after TorchBench is installed"
-  python -mpip freeze
-  popd
-}
-
 function install_torchao() {
   local commit
   commit=$(get_pinned_commit torchao)
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 6e89cd78b98d..e650fdf369c5 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -1662,12 +1662,10 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
 elif [[ "${TEST_CONFIG}" == cachebench ]]; then
   install_torchaudio
   install_torchvision
-  checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
   PYTHONPATH=$(pwd)/torchbench test_cachebench
 elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
   install_torchaudio
   install_torchvision
-  checkout_install_torchbench nanogpt
   PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
 elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   install_torchaudio
@@ -1677,18 +1675,12 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   # https://github.com/opencv/opencv-python/issues/885
   pip_install opencv-python==4.8.0.74
   if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
     PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
   elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
-    checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
-      llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
-      functorch_maml_omniglot yolov3 mobilenet_v2 resnext50_32x4d densenet121 mnasnet1_0
     PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
   elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
-    checkout_install_torchbench
     TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
   else
-    checkout_install_torchbench
     # Do this after checkout_install_torchbench to ensure we clobber any
     # nightlies that torchbench may pull in
     if [[ "${TEST_CONFIG}" != *cpu* ]]; then
diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt
deleted file mode 100644
index efbc3ceeb2af..000000000000
--- a/.github/ci_commit_pins/torchbench.txt
+++ /dev/null
@@ -1 +0,0 @@
-e03a63be43e33596f7f0a43b0f530353785e4a59

From 0ce8369bec0898fb31f98a325b8fdc4721e029be Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 17:21:04 -0700
Subject: [PATCH 07/17] Update

[ghstack-poisoned]
---
 .github/workflows/inductor-perf-test-b200.yml | 154 ------------------
 1 file changed, 154 deletions(-)
 delete mode 100644 .github/workflows/inductor-perf-test-b200.yml

diff --git a/.github/workflows/inductor-perf-test-b200.yml b/.github/workflows/inductor-perf-test-b200.yml
deleted file mode 100644
index 4d328475250e..000000000000
--- a/.github/workflows/inductor-perf-test-b200.yml
+++ /dev/null
@@ -1,154 +0,0 @@
-name: inductor-perf-b200
-
-on:
-  # DEBUG: TO BE CLEAN UP BEFORE LANDING
-  pull_request:
-    paths:
-      - .github/workflows/inductor-perf-test-b200.yml
-  schedule:
-    - cron: 0 7 * * 1-6
-    - cron: 0 7 * * 0
-  # NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
-  # out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
-  workflow_dispatch:
-    inputs:
-      training:
-        description: Run training (on by default)?
-        required: false
-        type: boolean
-        default: true
-      inference:
-        description: Run inference (on by default)?
-        required: false
-        type: boolean
-        default: true
-      default:
-        description: Run inductor_default?
-        required: false
-        type: boolean
-        default: false
-      dynamic:
-        description: Run inductor_dynamic_shapes?
-        required: false
-        type: boolean
-        default: false
-      cppwrapper:
-        description: Run inductor_cpp_wrapper?
-        required: false
-        type: boolean
-        default: false
-      cudagraphs:
-        description: Run inductor_cudagraphs?
-        required: false
-        type: boolean
-        default: true
-      freezing_cudagraphs:
-        description: Run inductor_cudagraphs with freezing for inference?
-        required: false
-        type: boolean
-        default: false
-      aotinductor:
-        description: Run aot_inductor for inference?
-        required: false
-        type: boolean
-        default: false
-      maxautotune:
-        description: Run inductor_max_autotune?
-        required: false
-        type: boolean
-        default: false
-      benchmark_configs:
-        description: The list of configs used the benchmark
-        required: false
-        type: string
-        default: inductor_huggingface_perf_cuda_b200,inductor_timm_perf_cuda_b200,inductor_torchbench_perf_cuda_b200
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
-  cancel-in-progress: true
-
-permissions:
-  id-token: write
-  contents: read
-
-jobs:
-  get-label-type:
-    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
-    if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
-    with:
-      triggering_actor: ${{ github.triggering_actor }}
-      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
-      curr_branch: ${{ github.head_ref || github.ref_name }}
-      curr_ref_type: ${{ github.ref_type }}
-      opt_out_experiments: lf
-
-  build:
-    name: cuda12.8-py3.10-gcc9-sm100
-    uses: ./.github/workflows/_linux-build.yml
-    needs: get-label-type
-    with:
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
-      docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
-      cuda-arch-list: '10.0'
-      test-matrix: |
-        { include: [
-          { config: "inductor_huggingface_perf_cuda_b200", shard: 1, num_shards: 1, runner: "B200" },
-          { config: "inductor_timm_perf_cuda_b200", shard: 1, num_shards: 1, runner: "B200" },
-          { config: "inductor_torchbench_perf_cuda_b200", shard: 1, num_shards: 1, runner: "B200" },
-        ]}
-      selected-test-configs: ${{ inputs.benchmark_configs }}
-    secrets: inherit
-
-  test-periodically:
-    name: cuda12.8-py3.10-gcc9-sm100
-    uses: ./.github/workflows/_linux-test.yml
-    needs: build
-    if: github.event.schedule == '0 7 * * 1-6'
-    with:
-      build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
-      dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
-      docker-image: ${{ needs.build.outputs.docker-image }}
-      test-matrix: ${{ needs.build.outputs.test-matrix }}
-      aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
-      timeout-minutes: 720
-      disable-monitor: false
-      monitor-log-interval: 15
-      monitor-data-collect-interval: 4
-    secrets: inherit
-
-  test-weekly:
-    name: cuda12.8-py3.10-gcc9-sm100
-    uses: ./.github/workflows/_linux-test.yml
-    needs: build
-    if: github.event.schedule == '0 7 * * 0'
-    with:
-      build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
-      dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
-      docker-image: ${{ needs.build.outputs.docker-image }}
-      test-matrix: ${{ needs.build.outputs.test-matrix }}
-      timeout-minutes: 1440
-      aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
-      disable-monitor: false
-      monitor-log-interval: 15
-      monitor-data-collect-interval: 4
-    secrets: inherit
-
-  test:
-    name: cuda12.8-py3.10-gcc9-sm100
-    uses: ./.github/workflows/_linux-test.yml
-    needs: build
-    with:
-      build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
-      # DEBUG: TO BE CLEAN UP BEFORE LANDING
-      # dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
-      dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
-      docker-image: ${{ needs.build.outputs.docker-image }}
-      test-matrix: ${{ needs.build.outputs.test-matrix }}
-      aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
-      timeout-minutes: 720
-      disable-monitor: false
-      monitor-log-interval: 15
-      monitor-data-collect-interval: 4
-    secrets: inherit

From 9fe4955c3b22b44ea13fe4233ca86272bb63fe4d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 17:23:03 -0700
Subject: [PATCH 08/17] Update

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/torchbench.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 120000 => 100644 .ci/docker/ci_commit_pins/torchbench.txt

diff --git a/.ci/docker/ci_commit_pins/torchbench.txt b/.ci/docker/ci_commit_pins/torchbench.txt
deleted file mode 120000
index eb6cadaa0696..000000000000
--- a/.ci/docker/ci_commit_pins/torchbench.txt
+++ /dev/null
@@ -1 +0,0 @@
-.ci/docker/ci_commit_pins/torchbench.txt
\ No newline at end of file
diff --git a/.ci/docker/ci_commit_pins/torchbench.txt b/.ci/docker/ci_commit_pins/torchbench.txt
new file mode 100644
index 000000000000..efbc3ceeb2af
--- /dev/null
+++ b/.ci/docker/ci_commit_pins/torchbench.txt
@@ -0,0 +1 @@
+e03a63be43e33596f7f0a43b0f530353785e4a59

From 9ff8af54c2a9188280cf346647c159190be62c22 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 18:14:34 -0700
Subject: [PATCH 09/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/build.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh
index 1fcc9f72053d..e9021b655848 100755
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@@ -315,10 +315,6 @@ else
         install_torchaudio
       fi
 
-      if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *text* ]]; then
-        install_torchtext
-      fi
-
       if [[ "${BUILD_ADDITIONAL_PACKAGES}" == *torchrec* || "${BUILD_ADDITIONAL_PACKAGES}" == *fbgemm* ]]; then
         install_torchrec_and_fbgemm
       fi

From 2bc4a7d6b62d39d8b0164804d42a455c391da9af Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 17 Jul 2025 20:24:28 -0700
Subject: [PATCH 10/17] Update

[ghstack-poisoned]
---
 .../common/install_inductor_benchmark_deps.sh       | 13 ++++++++-----
 .ci/docker/requirements-ci.txt                      |  7 ++++++-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/.ci/docker/common/install_inductor_benchmark_deps.sh b/.ci/docker/common/install_inductor_benchmark_deps.sh
index 5e6d77007f83..2e0780f889e1 100644
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@@ -15,8 +15,6 @@ function install_timm() {
   commit=$(get_pinned_commit timm)
 
   pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
-  # Clean up
-  conda_run pip uninstall -y torch torchvision triton
 }
 
 function install_torchbench() {
@@ -26,8 +24,6 @@ function install_torchbench() {
   pushd torchbench
   git checkout "$commit"
 
-  # Stable packages are ok here, just to satify TorchBench check
-  pip_install torch torchvision torchaudio
   python install.py --continue_on_fail
 
   # TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
@@ -41,6 +37,13 @@ function install_torchbench() {
 
 # Pango is needed for weasyprint which is needed for doctr
 conda_install pango
+
+# Stable packages are ok here, just to satisfy TorchBench check
+pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
+
+install_torchbench
 install_huggingface
 install_timm
-install_torchbench
+
+# Clean up
+conda_run pip uninstall -y torch torchvision torchaudio triton
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
index 9c8251989477..944b1fb35b36 100644
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@@ -361,7 +361,6 @@ pwlf==2.2.1
 #Pinned versions: 2.2.1
 #test that import: test_sac_estimator.py
 
-
 # To build PyTorch itself
 pyyaml
 pyzstd
@@ -389,3 +388,9 @@ tlparse==0.3.30
 cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
 #Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits.
 #test that import: test_cuda.py
+
+setuptools-git-versioning==2.1.0
+scikit-build==0.18.1
+pyre-extensions==0.0.32
+tabulate==0.9.0
+#Description: These package are needed to build FBGEMM and torchrec on PyTorch CI

From 14a38c719b29a19f518239b5edb084838ac5d2fb Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 18 Jul 2025 09:02:44 -0700
Subject: [PATCH 11/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/test.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index e650fdf369c5..c9d53101180f 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -1662,11 +1662,11 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
 elif [[ "${TEST_CONFIG}" == cachebench ]]; then
   install_torchaudio
   install_torchvision
-  PYTHONPATH=$(pwd)/torchbench test_cachebench
+  PYTHONPATH=/torchbench test_cachebench
 elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
   install_torchaudio
   install_torchvision
-  PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
+  PYTHONPATH=/torchbench test_verify_cachebench
 elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   install_torchaudio
   install_torchvision
@@ -1675,22 +1675,22 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   # https://github.com/opencv/opencv-python/issues/885
   pip_install opencv-python==4.8.0.74
   if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
+    PYTHONPATH=/torchbench test_inductor_torchbench_smoketest_perf
   elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
-    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
+    PYTHONPATH=/torchbench test_inductor_torchbench_cpu_smoketest_perf
   elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
-    TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
+    TORCHBENCHPATH=/torchbench test_torchbench_gcp_smoketest
   else
     # Do this after checkout_install_torchbench to ensure we clobber any
     # nightlies that torchbench may pull in
     if [[ "${TEST_CONFIG}" != *cpu* ]]; then
       install_torchrec_and_fbgemm
     fi
-    PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
+    PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
   fi
 elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
   install_torchvision
-  PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
+  PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
   if [[ "$SHARD_NUMBER" -eq "1" ]]; then
     test_inductor_aoti
   fi

From 550d9e7ae5a9ff8d679b4be2f0a3a9165987b74d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 18 Jul 2025 19:26:47 -0700
Subject: [PATCH 12/17] Update

[ghstack-poisoned]
---
 .ci/docker/requirements-ci.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
index 650c4e58c8ba..facc633f6a7a 100644
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@@ -390,3 +390,9 @@ tlparse==0.3.30
 cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
 #Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits.
 #test that import: test_cuda.py
+
+setuptools-git-versioning==2.1.0
+scikit-build==0.18.1
+pyre-extensions==0.0.32
+tabulate==0.9.0
+#Description: These package are needed to build FBGEMM and torchrec on PyTorch CI

From 046bc530cf63adb1c26828fe45b5974145fac83d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 18 Jul 2025 23:47:32 -0700
Subject: [PATCH 13/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/common_utils.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index 69a5b7ad3795..648bb835372d 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -174,7 +174,12 @@ function install_torchvision() {
     echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c -
     LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
   fi
+
+  if [[ -n "${TORCH_CUDA_ARCH_LIST}" ]]; then
+    export WITH_CUDA=1
+  fi
   pip_build_and_install "git+https://github.com/pytorch/vision.git@${commit}" dist/vision
+
   if [ -n "${LD_PRELOAD}" ]; then
     LD_PRELOAD=${orig_preload}
   fi

From 626d1ff0d35bf2cc964632e2d16f3f5a308e5f04 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 19 Jul 2025 01:25:00 -0700
Subject: [PATCH 14/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/common_utils.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index 648bb835372d..e9994e35c418 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -175,7 +175,7 @@ function install_torchvision() {
     LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
   fi
 
-  if [[ -n "${TORCH_CUDA_ARCH_LIST}" ]]; then
+  if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then
     export WITH_CUDA=1
   fi
   pip_build_and_install "git+https://github.com/pytorch/vision.git@${commit}" dist/vision

From 1d30995dd6e4eaaf0ffe47911d37ee8c3ee57c45 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 19 Jul 2025 10:48:06 -0700
Subject: [PATCH 15/17] Update

[ghstack-poisoned]
---
 .ci/pytorch/common_utils.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh
index e9994e35c418..9075fe5fb56f 100644
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@@ -176,6 +176,8 @@ function install_torchvision() {
   fi
 
   if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then
+    # Not sure if both are needed, but why not
+    export FORCE_CUDA=1
     export WITH_CUDA=1
   fi
   pip_build_and_install "git+https://github.com/pytorch/vision.git@${commit}" dist/vision

From 9867f10862144f227a496a3a345ccdcf4dbd7e54 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Mon, 21 Jul 2025 10:21:48 -0700
Subject: [PATCH 16/17] Update

[ghstack-poisoned]
---
 .ci/docker/ubuntu-rocm/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.ci/docker/ubuntu-rocm/Dockerfile b/.ci/docker/ubuntu-rocm/Dockerfile
index 2528da07c69e..8f2cc6eef958 100644
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@@ -98,8 +98,9 @@ COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/huggingface.txt huggingface.txt
 COPY ci_commit_pins/timm.txt timm.txt
+COPY ci_commit_pins/torchbench.txt torchbench.txt
 RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
-RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
+RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
 
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION

From a0aa37a6e2c4172c672ae260abe2f50160750994 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 22 Jul 2025 17:29:39 -0700
Subject: [PATCH 17/17] Fix CI permission issues

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .ci/docker/build.sh                                  | 1 +
 .ci/docker/common/install_inductor_benchmark_deps.sh | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index d8de42368200..6d0540e88803 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -306,6 +306,7 @@ case "$tag" in
     TRITON=yes
     DOCS=yes
     UNINSTALL_DILL=yes
+    INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-jammy-py3-clang12-executorch)
     ANACONDA_PYTHON_VERSION=3.10
diff --git a/.ci/docker/common/install_inductor_benchmark_deps.sh b/.ci/docker/common/install_inductor_benchmark_deps.sh
index 2e0780f889e1..bda3aa600956 100644
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@@ -33,6 +33,8 @@ function install_torchbench() {
   echo "Print all dependencies after TorchBench is installed"
   python -mpip freeze
   popd
+
+  chown -R jenkins torchbench
 }
 
 # Pango is needed for weasyprint which is needed for doctr

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/158613.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/158613.patch" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/158613.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/158613.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>