diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py
index fadde8858..9390cac44 100644
--- a/pgml-extension/src/bindings/transformers/transformers.py
+++ b/pgml-extension/src/bindings/transformers/transformers.py
@@ -41,7 +41,9 @@
     PegasusTokenizer,
     TrainingArguments,
     Trainer,
-    GPTQConfig
+    GPTQConfig,
+    PegasusForConditionalGeneration,
+    PegasusTokenizer,
 )
 import threading
 
@@ -254,6 +256,8 @@ def __init__(self, model_name, **kwargs):
         if "use_auth_token" in kwargs:
             kwargs["token"] = kwargs.pop("use_auth_token")
 
+        self.model_name = model_name
+
         if (
             "task" in kwargs
             and model_name is not None
@@ -278,29 +282,55 @@ def __init__(self, model_name, **kwargs):
                     model_name, **kwargs
                 )
             elif self.task == "summarization" or self.task == "translation":
-                self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, **kwargs)
+                if model_name == "google/pegasus-xsum":
+                    # HF auto model doesn't detect GPUs
+                    self.model = PegasusForConditionalGeneration.from_pretrained(
+                        model_name
+                    )
+                else:
+                    self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                        model_name, **kwargs
+                    )
             elif self.task == "text-generation" or self.task == "conversational":
                 # See: https://huggingface.co/docs/transformers/main/quantization
                 if "quantization_config" in kwargs:
                     quantization_config = kwargs.pop("quantization_config")
                     quantization_config = GPTQConfig(**quantization_config)
-                    self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, **kwargs)
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name, quantization_config=quantization_config, **kwargs
+                    )
                 else:
-                    self.model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name, **kwargs
+                    )
             else:
                 raise PgMLException(f"Unhandled task: {self.task}")
 
+            if model_name == "google/pegasus-xsum":
+                kwargs.pop("token", None)
+
             if "token" in kwargs:
                 self.tokenizer = AutoTokenizer.from_pretrained(
                     model_name, token=kwargs["token"]
                 )
             else:
-                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+                if model_name == "google/pegasus-xsum":
+                    self.tokenizer = PegasusTokenizer.from_pretrained(model_name)
+                else:
+                    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+            pipe_kwargs = {
+                "model": self.model,
+                "tokenizer": self.tokenizer,
+            }
+
+            # https://huggingface.co/docs/transformers/en/model_doc/pegasus
+            if model_name == "google/pegasus-xsum":
+                pipe_kwargs["device"] = kwargs.get("device", "cpu")
 
             self.pipe = transformers.pipeline(
                 self.task,
-                model=self.model,
-                tokenizer=self.tokenizer,
+                **pipe_kwargs,
             )
         else:
             self.pipe = transformers.pipeline(**kwargs)
@@ -320,7 +350,7 @@ def stream(self, input, timeout=None, **kwargs):
                 self.tokenizer,
                 timeout=timeout,
                 skip_prompt=True,
-                skip_special_tokens=True
+                skip_special_tokens=True,
             )
             if "chat_template" in kwargs:
                 input = self.tokenizer.apply_chat_template(
@@ -343,9 +373,7 @@ def stream(self, input, timeout=None, **kwargs):
             )
         else:
             streamer = TextIteratorStreamer(
-                self.tokenizer,
-                timeout=timeout,
-                skip_special_tokens=True
+                self.tokenizer, timeout=timeout, skip_special_tokens=True
             )
             input = self.tokenizer(input, return_tensors="pt", padding=True).to(
                 self.model.device
@@ -496,7 +524,6 @@ def embed(transformer, inputs, kwargs):
     return embed_using(model, transformer, inputs, kwargs)
 
 
-
 def clear_gpu_cache(memory_usage: None):
     if not torch.cuda.is_available():
         raise PgMLException(f"No GPU available")

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/postgresml/postgresml/pull/1325.diff" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/postgresml/postgresml/pull/1325.diff" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/postgresml/postgresml/pull/1325.diff" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/postgresml/postgresml/pull/1325.diff" target="_blank">pFad v4 Proxy</a></p></body>
</html>