import gradio as gr from transformers import pipeline import torch import os # Configure cache to avoid space limitations os.environ['HF_HOME'] = '/tmp/cache' # Use a reliable LLM hosted by Hugging Face MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" # Load the model pipeline generator = pipeline( "text-generation", model=MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, max_new_tokens=560 ) def generate_chat_completion(message_history, max_tokens=560, temperature=0.8): """Generate assistant response from chat message history""" try: # If using Gradio chat format (list of tuples), convert to role-content dicts messages = [{"role": "user", "content": msg} if i % 2 == 0 else {"role": "assistant", "content": msg} for i, msg in enumerate(message_history)] prompt = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in messages]) prompt += "\nAssistant:" output = generator( prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=0.95, repetition_penalty=1.15, do_sample=True ) response = output[0]['generated_text'].replace(prompt, "").strip() return message_history + [response] except Exception as e: return message_history + [f"[Error] {str(e)}"] # Gradio Chat Interface chat_interface = gr.ChatInterface( fn=generate_chat_completion, title="Mistral-7B Chat", description="Powered by Hugging Face Transformers", retry_btn="Retry", undo_btn="Undo", clear_btn="Clear" ) if __name__ == "__main__": chat_interface.launch()