Spaces:
Runtime error
Runtime error
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import os | |
from huggingface_hub import login | |
# Authenticate with Hugging Face token | |
login(os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
# Setup environment and dtype for CPU/GPU compatibility | |
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
os.environ['HF_HOME'] = '/tmp/cache' | |
# Load model and tokenizer (using cerebras BTLM-3B-8K) | |
model_name = "cerebras/btlm-3b-8k-chat" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch_dtype, | |
device_map="auto" | |
) | |
# Create text generation pipeline with required pad_token_id for this model | |
generator = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device_map="auto", | |
torch_dtype=torch_dtype, | |
pad_token_id=tokenizer.eos_token_id # Important for BTLM model | |
) | |
def generate_chat_completion(message: str, history: list = None): | |
""" | |
If history is provided as list of {'role': str, 'content': str} dicts, | |
reconstructs the full prompt and returns updated history. | |
""" | |
history = history or [] | |
prompt = "" | |
for msg in history: | |
prompt += f"{msg['role'].capitalize()}: {msg['content']}\n" | |
prompt += f"User: {message}\nAssistant:" | |
output = generator( | |
prompt, | |
max_new_tokens=256, | |
temperature=0.7, # Slightly lower temp for more coherent replies | |
top_p=0.9, | |
repetition_penalty=1.1, | |
do_sample=True | |
) | |
reply = output[0]['generated_text'].replace(prompt, "").strip() | |
# Append new interaction to history | |
history.append({"role": "user", "content": message}) | |
history.append({"role": "assistant", "content": reply}) | |
return history | |