Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import torch | |
import os | |
# Configure cache to avoid space limitations | |
os.environ['HF_HOME'] = '/tmp/cache' | |
# Use a reliable LLM hosted by Hugging Face | |
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" | |
# Load the model pipeline | |
generator = pipeline( | |
"text-generation", | |
model=MODEL_NAME, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, | |
max_new_tokens=560 | |
) | |
def generate_chat_completion(message_history, max_tokens=560, temperature=0.8): | |
"""Generate assistant response from chat message history""" | |
try: | |
# If using Gradio chat format (list of tuples), convert to role-content dicts | |
messages = [{"role": "user", "content": msg} if i % 2 == 0 else {"role": "assistant", "content": msg} | |
for i, msg in enumerate(message_history)] | |
prompt = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in messages]) | |
prompt += "\nAssistant:" | |
output = generator( | |
prompt, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
top_p=0.95, | |
repetition_penalty=1.15, | |
do_sample=True | |
) | |
response = output[0]['generated_text'].replace(prompt, "").strip() | |
return message_history + [response] | |
except Exception as e: | |
return message_history + [f"[Error] {str(e)}"] | |
# Gradio Chat Interface | |
chat_interface = gr.ChatInterface( | |
fn=generate_chat_completion, | |
title="Mistral-7B Chat", | |
description="Powered by Hugging Face Transformers", | |
retry_btn="Retry", | |
undo_btn="Undo", | |
clear_btn="Clear" | |
) | |
if __name__ == "__main__": | |
chat_interface.launch() | |