black

postgresml · levkk · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024
commit b9a6eb0d4ca5f194c659ad2f621a038d242818ce
diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py
@@ -243,6 +243,7 @@ def __next__(self):
             self.q.task_done()
             return v
 
+
 class StandardPipeline(object):
     def __init__(self, model_name, **kwargs):
         # the default pipeline constructor doesn't pass all the kwargs (particularly load_in_4bit)
@@ -283,17 +284,25 @@ def __init__(self, model_name, **kwargs):
             elif self.task == "summarization" or self.task == "translation":
                 if model_name == "google/pegasus-xsum":
                     # HF auto model doesn't detect GPUs
-                    self.model = PegasusForConditionalGeneration.from_pretrained(model_name)
+                    self.model = PegasusForConditionalGeneration.from_pretrained(
+                        model_name
+                    )
                 else:
-                    self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, **kwargs)
+                    self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                        model_name, **kwargs
+                    )
             elif self.task == "text-generation" or self.task == "conversational":
                 # See: https://huggingface.co/docs/transformers/main/quantization
                 if "quantization_config" in kwargs:
                     quantization_config = kwargs.pop("quantization_config")
                     quantization_config = GPTQConfig(**quantization_config)
-                    self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, **kwargs)
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name, quantization_config=quantization_config, **kwargs
+                    )
                 else:
-                    self.model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name, **kwargs
+                    )
             else:
                 raise PgMLException(f"Unhandled task: {self.task}")
 
@@ -341,7 +350,7 @@ def stream(self, input, timeout=None, **kwargs):
                 self.tokenizer,
                 timeout=timeout,
                 skip_prompt=True,
-                skip_special_tokens=True
+                skip_special_tokens=True,
             )
             if "chat_template" in kwargs:
                 input = self.tokenizer.apply_chat_template(
@@ -364,9 +373,7 @@ def stream(self, input, timeout=None, **kwargs):
             )
         else:
             streamer = TextIteratorStreamer(
-                self.tokenizer,
-                timeout=timeout,
-                skip_special_tokens=True
+                self.tokenizer, timeout=timeout, skip_special_tokens=True
             )
             input = self.tokenizer(input, return_tensors="pt", padding=True).to(
                 self.model.device
@@ -517,7 +524,6 @@ def embed(transformer, inputs, kwargs):
     return embed_using(model, transformer, inputs, kwargs)
 
 
-
 def clear_gpu_cache(memory_usage: None):
     if not torch.cuda.is_available():
         raise PgMLException(f"No GPU available")