Spaces:

Euryeth
/

LLM_Ariphes

Runtime error

App Files Files Community

Euryeth commited on Jun 8

Commit

beb9a26

verified ·

1 Parent(s): c60c816

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -42

app.py CHANGED Viewed

@@ -3,53 +3,37 @@ from transformers import pipeline
 import torch
 import os
-# Configure cache to avoid space limitations
-os.environ['HF_HOME'] = '/tmp/cache'
-# Use a reliable LLM hosted by Hugging Face
-MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
 # Load the model pipeline
 generator = pipeline(
     "text-generation",
-    model=MODEL_NAME,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    max_new_tokens=560
 )
-def generate_chat_completion(message_history, max_tokens=560, temperature=0.8):
-    """Generate assistant response from chat message history"""
-    try:
-        # If using Gradio chat format (list of tuples), convert to role-content dicts
-        messages = [{"role": "user", "content": msg} if i % 2 == 0 else {"role": "assistant", "content": msg}
-                    for i, msg in enumerate(message_history)]
-        prompt = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in messages])
-        prompt += "\nAssistant:"
-        output = generator(
-            prompt,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=0.95,
-            repetition_penalty=1.15,
-            do_sample=True
-        )
-        response = output[0]['generated_text'].replace(prompt, "").strip()
-        return message_history + [response]
-    except Exception as e:
-        return message_history + [f"[Error] {str(e)}"]
-# Gradio Chat Interface
-chat_interface = gr.ChatInterface(
-    fn=generate_chat_completion,
-    title="Mistral-7B Chat",
-    description="Powered by Hugging Face Transformers",
-    retry_btn="Retry",
-    undo_btn="Undo",
-    clear_btn="Clear"
-)
-if __name__ == "__main__":
-    chat_interface.launch()

 import torch
 import os
+# Use safe float32 for CPU compatibility
+torch_dtype = torch.float32
+# Configure cache directory
+os.environ['HF_HOME'] = '/tmp/cache'
 # Load the model pipeline
 generator = pipeline(
     "text-generation",
+    model="mistralai/Mistral-7B-Instruct-v0.2",
+    device=0 if torch.cuda.is_available() else -1,
+    torch_dtype=torch_dtype
 )
+def generate_chat_completion(message, history):
+    """Basic chatbot for Gradio interface"""
+    prompt = f"User: {message}\nAssistant:"
+    output = generator(
+        prompt,
+        max_new_tokens=512,
+        temperature=0.8,
+        top_p=0.95,
+        repetition_penalty=1.15,
+        do_sample=True
+    )
+    response = output[0]['generated_text'].replace(prompt, "").strip()
+    return response
+gr.ChatInterface(fn=generate_chat_completion,
+                 title="Mistral Chatbot",
+                 description="Chat with Mistral-7B",
+                 retry_btn="Retry",
+                 undo_btn="Undo",
+                 clear_btn="Clear").launch()