Skip to content

Add support for google/pegasus-xsum #1325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 22, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
black
  • Loading branch information
levkk committed Feb 22, 2024
commit b9a6eb0d4ca5f194c659ad2f621a038d242818ce
24 changes: 15 additions & 9 deletions pgml-extension/src/bindings/transformers/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def __next__(self):
self.q.task_done()
return v


class StandardPipeline(object):
def __init__(self, model_name, **kwargs):
# the default pipeline constructor doesn't pass all the kwargs (particularly load_in_4bit)
Expand Down Expand Up @@ -283,17 +284,25 @@ def __init__(self, model_name, **kwargs):
elif self.task == "summarization" or self.task == "translation":
if model_name == "google/pegasus-xsum":
# HF auto model doesn't detect GPUs
self.model = PegasusForConditionalGeneration.from_pretrained(model_name)
self.model = PegasusForConditionalGeneration.from_pretrained(
model_name
)
else:
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, **kwargs)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
model_name, **kwargs
)
elif self.task == "text-generation" or self.task == "conversational":
# See: https://huggingface.co/docs/transformers/main/quantization
if "quantization_config" in kwargs:
quantization_config = kwargs.pop("quantization_config")
quantization_config = GPTQConfig(**quantization_config)
self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, **kwargs)
self.model = AutoModelForCausalLM.from_pretrained(
model_name, quantization_config=quantization_config, **kwargs
)
else:
self.model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
self.model = AutoModelForCausalLM.from_pretrained(
model_name, **kwargs
)
else:
raise PgMLException(f"Unhandled task: {self.task}")

Expand Down Expand Up @@ -341,7 +350,7 @@ def stream(self, input, timeout=None, **kwargs):
self.tokenizer,
timeout=timeout,
skip_prompt=True,
skip_special_tokens=True
skip_special_tokens=True,
)
if "chat_template" in kwargs:
input = self.tokenizer.apply_chat_template(
Expand All @@ -364,9 +373,7 @@ def stream(self, input, timeout=None, **kwargs):
)
else:
streamer = TextIteratorStreamer(
self.tokenizer,
timeout=timeout,
skip_special_tokens=True
self.tokenizer, timeout=timeout, skip_special_tokens=True
)
input = self.tokenizer(input, return_tensors="pt", padding=True).to(
self.model.device
Expand Down Expand Up @@ -517,7 +524,6 @@ def embed(transformer, inputs, kwargs):
return embed_using(model, transformer, inputs, kwargs)



def clear_gpu_cache(memory_usage: None):
if not torch.cuda.is_available():
raise PgMLException(f"No GPU available")
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy