Spaces:
Running
Running
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline | |
import torch | |
class HuggingFaceAgent: | |
def __init__(self, model_id, access_token): | |
self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token) | |
self.model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
token=access_token, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
device_map="auto" | |
) | |
self.pipeline = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer) | |
def responder(self, prompt): | |
respuesta = self.pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]["generated_text"] | |
return respuesta[len(prompt):].strip() | |