diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index a0540d3a1b2..d1567e60968 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -534,6 +534,11 @@ def factorize(self) -> EncodedGroups:
             list(grouper.full_index.values for grouper in groupers),
             names=tuple(grouper.name for grouper in groupers),
         )
+        if not full_index.is_unique:
+            raise ValueError(
+                "The output index for the GroupBy is non-unique. "
+                "This is a bug in the Grouper provided."
+            )
         # This will be unused when grouping by dask arrays, so skip..
         if not is_chunked_array(_flatcodes):
             # Constructing an index from the product is wrong when there are missing groups
@@ -942,17 +947,29 @@ def _binary_op(self, other, f, reflexive=False):
     def _restore_dim_order(self, stacked):
         raise NotImplementedError
 
-    def _maybe_restore_empty_groups(self, combined):
-        """Our index contained empty groups (e.g., from a resampling or binning). If we
+    def _maybe_reindex(self, combined):
+        """Reindexing is needed in two cases:
+        1. Our index contained empty groups (e.g., from a resampling or binning). If we
         reduced on that dimension, we want to restore the full index.
+
+        2. We use a MultiIndex for multi-variable GroupBy.
+        The MultiIndex stores each level's labels in sorted order
+        which are then assigned on unstacking. So we need to restore
+        the correct order here.
         """
         has_missing_groups = (
             self.encoded.unique_coord.size != self.encoded.full_index.size
         )
         indexers = {}
         for grouper in self.groupers:
-            if has_missing_groups and grouper.name in combined._indexes:
+            index = combined._indexes.get(grouper.name, None)
+            if has_missing_groups and index is not None:
                 indexers[grouper.name] = grouper.full_index
+            elif len(self.groupers) > 1:
+                if not isinstance(
+                    grouper.full_index, pd.RangeIndex
+                ) and not index.index.equals(grouper.full_index):
+                    indexers[grouper.name] = grouper.full_index
         if indexers:
             combined = combined.reindex(**indexers)
         return combined
@@ -1540,7 +1557,7 @@ def _combine(self, applied, shortcut=False):
         if dim not in applied_example.dims:
             combined = combined.assign_coords(self.encoded.coords)
         combined = self._maybe_unstack(combined)
-        combined = self._maybe_restore_empty_groups(combined)
+        combined = self._maybe_reindex(combined)
         return combined
 
     def reduce(
@@ -1696,7 +1713,7 @@ def _combine(self, applied):
         if dim not in applied_example.dims:
             combined = combined.assign_coords(self.encoded.coords)
         combined = self._maybe_unstack(combined)
-        combined = self._maybe_restore_empty_groups(combined)
+        combined = self._maybe_reindex(combined)
         return combined
 
     def reduce(
diff --git a/xarray/groupers.py b/xarray/groupers.py
index 234c9f1398a..383f5e85302 100644
--- a/xarray/groupers.py
+++ b/xarray/groupers.py
@@ -517,7 +517,7 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
             counts = grouped.count()
             # This way we generate codes for the final output index: full_index.
             # So for _flox_reduce we avoid one reindex and copy by avoiding
-            # _maybe_restore_empty_groups
+            # _maybe_reindex
             codes = np.repeat(np.arange(len(first_items)), counts)
             return first_items, codes
 
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 89ac567a4d8..eb62600d2a9 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -154,7 +154,7 @@ def test_multi_index_groupby_sum() -> None:
 
 
 @requires_pandas_ge_2_2
-def test_multi_index_propagation():
+def test_multi_index_propagation() -> None:
     # regression test for GH9648
     times = pd.date_range("2023-01-01", periods=4)
     locations = ["A", "B"]
@@ -2289,7 +2289,7 @@ def test_resample_origin(self) -> None:
         times = pd.date_range("2000-01-01T02:03:01", freq="6h", periods=10)
         array = DataArray(np.arange(10), [("time", times)])
 
-        origin = "start"
+        origin: Literal["start"] = "start"
         actual = array.resample(time="24h", origin=origin).mean()
         expected = DataArray(array.to_series().resample("24h", origin=origin).mean())
         assert_identical(expected, actual)
@@ -2694,7 +2694,7 @@ def test_default_flox_method() -> None:
 
 @requires_cftime
 @pytest.mark.filterwarnings("ignore")
-def test_cftime_resample_gh_9108():
+def test_cftime_resample_gh_9108() -> None:
     import cftime
 
     ds = Dataset(
@@ -3044,7 +3044,7 @@ def test_gappy_resample_reductions(reduction):
     assert_identical(expected, actual)
 
 
-def test_groupby_transpose():
+def test_groupby_transpose() -> None:
     # GH5361
     data = xr.DataArray(
         np.random.randn(4, 2),
@@ -3104,7 +3104,7 @@ def test_lazy_grouping(grouper, expect_index):
 
 
 @requires_dask
-def test_lazy_grouping_errors():
+def test_lazy_grouping_errors() -> None:
     import dask.array
 
     data = DataArray(
@@ -3130,7 +3130,7 @@ def test_lazy_grouping_errors():
 
 
 @requires_dask
-def test_lazy_int_bins_error():
+def test_lazy_int_bins_error() -> None:
     import dask.array
 
     with pytest.raises(ValueError, match="Bin edges must be provided"):
@@ -3138,7 +3138,7 @@ def test_lazy_int_bins_error():
             _ = BinGrouper(bins=4).factorize(DataArray(dask.array.arange(3)))
 
 
-def test_time_grouping_seasons_specified():
+def test_time_grouping_seasons_specified() -> None:
     time = xr.date_range("2001-01-01", "2002-01-01", freq="D")
     ds = xr.Dataset({"foo": np.arange(time.size)}, coords={"time": ("time", time)})
     labels = ["DJF", "MAM", "JJA", "SON"]
@@ -3147,7 +3147,36 @@ def test_time_grouping_seasons_specified():
     assert_identical(actual, expected.reindex(season=labels))
 
 
-def test_groupby_multiple_bin_grouper_missing_groups():
+def test_multiple_grouper_unsorted_order() -> None:
+    time = xr.date_range("2001-01-01", "2003-01-01", freq="MS")
+    ds = xr.Dataset({"foo": np.arange(time.size)}, coords={"time": ("time", time)})
+    labels = ["DJF", "MAM", "JJA", "SON"]
+    actual = ds.groupby(
+        {
+            "time.season": UniqueGrouper(labels=labels),
+            "time.year": UniqueGrouper(labels=[2002, 2001]),
+        }
+    ).sum()
+    expected = (
+        ds.groupby({"time.season": UniqueGrouper(), "time.year": UniqueGrouper()})
+        .sum()
+        .reindex(season=labels, year=[2002, 2001])
+    )
+    assert_identical(actual, expected.reindex(season=labels))
+
+    b = xr.DataArray(
+        np.random.default_rng(0).random((2, 3, 4)),
+        coords={"x": [0, 1], "y": [0, 1, 2]},
+        dims=["x", "y", "z"],
+    )
+    actual2 = b.groupby(
+        x=UniqueGrouper(labels=[1, 0]), y=UniqueGrouper(labels=[2, 0, 1])
+    ).sum()
+    expected2 = b.reindex(x=[1, 0], y=[2, 0, 1]).transpose("z", ...)
+    assert_identical(actual2, expected2)
+
+
+def test_groupby_multiple_bin_grouper_missing_groups() -> None:
     from numpy import nan
 
     ds = xr.Dataset(
@@ -3224,7 +3253,7 @@ def test_shuffle_by(chunks, expected_chunks):
 
 
 @requires_dask
-def test_groupby_dask_eager_load_warnings():
+def test_groupby_dask_eager_load_warnings() -> None:
     ds = xr.Dataset(
         {"foo": (("z"), np.arange(12))},
         coords={"x": ("z", np.arange(12)), "y": ("z", np.arange(12))},

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/pydata/xarray/pull/10151.diff" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/pydata/xarray/pull/10151.diff" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/pydata/xarray/pull/10151.diff" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/pydata/xarray/pull/10151.diff" target="_blank">pFad v4 Proxy</a></p></body>
</html>