Spaces:

Ozaii
/

ZephyrChat

Sleeping

App Files Files Community

Ozaii commited on Aug 11, 2024

Commit

4567ac3

verified ·

1 Parent(s): eb9799a

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -127

app.py CHANGED Viewed

@@ -1,137 +1,189 @@
-import spaces
-import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from peft import PeftModel, PeftConfig
-import gc
-import time
-from functools import lru_cache
 from threading import Thread
-# Constants
-MODEL_PATH = "Ozaii/Zephyr"
-MAX_SEQ_LENGTH = 2048
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-MAX_GENERATION_TIME = 55  # Set to 55 seconds to give some buffer
-# Global variables to store model components
-model = None
-tokenizer = None
-@spaces.GPU
-def load_model_if_needed():
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        try:
-            print("Loading model components...")
-            peft_config = PeftConfig.from_pretrained(MODEL_PATH)
-            print(f"PEFT config loaded. Base model: {peft_config.base_model_name_or_path}")
-            tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
-            print("Tokenizer loaded")
-            base_model = AutoModelForCausalLM.from_pretrained(
-                peft_config.base_model_name_or_path,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                low_cpu_mem_usage=True,
-                load_in_4bit=True,  # Try 4-bit quantization
-            )
-            print("Base model loaded")
-            model = PeftModel.from_pretrained(base_model, MODEL_PATH, device_map="auto")
-            model.eval()
-            model.tie_weights()
-            print("PEFT model loaded, weights tied, and set to eval mode")
-            # Move model to GPU explicitly
-            model.to(DEVICE)
-            print(f"Model moved to {DEVICE}")
-            # Clear CUDA cache
-            torch.cuda.empty_cache()
-            gc.collect()
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            raise
-initial_prompt = """You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty,
-and always ready with a witty comeback. Your responses should be engaging
-and playful, with a hint of romance. Keep the conversation flowing naturally,
-asking questions and showing genuine interest in Kaan's life and thoughts."""
-@spaces.GPU
-@lru_cache(maxsize=100)  # Cache the last 100 responses
-def generate_response(prompt):
-    global model, tokenizer
-    load_model_if_needed()
-    print(f"Generating response for prompt: {prompt[:50]}...")
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_SEQ_LENGTH)
-    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-    try:
-        start_time = time.time()
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=50,  # Reduced from 150
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.95,
-                repetition_penalty=1.2,
-                no_repeat_ngram_size=3,
-                max_time=MAX_GENERATION_TIME,
-            )
-        generation_time = time.time() - start_time
-        if generation_time > MAX_GENERATION_TIME:
-            return "I'm thinking too hard. Can we try a simpler question?"
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        print(f"Generated response in {generation_time:.2f} seconds: {response[:50]}...")
-        # Clear CUDA cache after generation
-        torch.cuda.empty_cache()
-        gc.collect()
-    except RuntimeError as e:
-        if "out of memory" in str(e):
-            print("CUDA out of memory. Attempting to recover...")
-            torch.cuda.empty_cache()
-            gc.collect()
-            return "I'm feeling a bit overwhelmed. Can we take a short break and try again?"
-        else:
-            print(f"Error generating response: {e}")
-            return "I'm having trouble finding the right words. Can we try again?"
-    return response
-def chat_with_zephyr(message, history):
-    # Limit the history to the last 3 exchanges to keep the context smaller
-    limited_history = history[-3:]
-    prompt = initial_prompt + "\n" + "\n".join([f"Human: {h[0]}\nZephyr: {h[1]}" for h in limited_history])
-    prompt += f"\nHuman: {message}\nZephyr:"
-    response = generate_response(prompt)
-    zephyr_response = response.split("Zephyr:")[-1].strip()
-    return zephyr_response
-iface = gr.ChatInterface(
-    chat_with_zephyr,
-    title="Chat with Zephyr",
-    description="I'm Zephyr, your charming AI. Let's chat!",
-    theme="soft",
-    examples=[
-        "Tell me about yourself, Zephyr.",
-        "What's your idea of a perfect date?",
-        "How do you feel about long-distance relationships?",
-        "Can you give me a compliment in Turkish?",
-        "What's your favorite memory with Kaan?",
-    ],
-    cache_examples=False,
-)
-if __name__ == "__main__":
-    print("Launching Gradio interface...")
-    iface.launch()

 import torch
 from peft import PeftModel, PeftConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+import gradio as gr
+import re
+import json
+from datetime import datetime
 from threading import Thread
+# Load the model and tokenizer
+MODEL_PATH = "Ozzai/zephyr-bae"  # Your Hugging Face model path
+print("Attempting to load Zephyr... Cross your fingers! 🤞")
+try:
+    # Load the PEFT config
+    peft_config = PeftConfig.from_pretrained(MODEL_PATH)
+    # Load the base model
+    base_model = AutoModelForCausalLM.from_pretrained(
+        peft_config.base_model_name_or_path,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        low_cpu_mem_usage=True
+    )
+    # Load the PEFT model
+    model = PeftModel.from_pretrained(base_model, MODEL_PATH)
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+    print("Zephyr loaded successfully! Time to charm!")
+except Exception as e:
+    print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
+    raise
+# Prepare the model for generation
+model.eval()
+# Feedback data (Note: This won't persist in Spaces, but keeping the structure for potential future use)
+feedback_data = []
+def clean_response(response):
+    # Remove any non-Zephyr dialogue or narration
+    response = re.sub(r'(Kaan|Kanan|Kan|knan):.*?(\n|$)', '', response, flags=re.IGNORECASE)
+    response = re.sub(r'\*.*?\*', '', response)
+    response = re.sub(r'\(.*?\)', '', response)
+    # Find Zephyr's response
+    match = re.search(r'Zephyr:\s*(.*?)(?=$|\n[A-Za-z]+:|Kaan:)', response, re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    else:
+        return response.strip()
+def generate_response(prompt, max_new_tokens=128):
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
+        input_ids=inputs.input_ids,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.2,
+        no_repeat_ngram_size=3,
+        streamer=streamer,
+        eos_token_id=tokenizer.encode("Kaan:", add_special_tokens=False)[0]  # Stop at "Kaan:"
+    )
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    generated_text = ""
+    for new_text in streamer:
+        generated_text += new_text
+        cleaned_response = clean_response(generated_text)
+        if cleaned_response:
+            yield cleaned_response
+def chat_with_zephyr(message, history):
+    conversation_history = history[-3:]  # Limit to last 3 exchanges for more focused responses
+    full_prompt = "\n".join([f"Kaan: {h[0]}\nZephyr: {h[1]}" for h in conversation_history])
+    full_prompt += f"\nKaan: {message}\nZephyr:"
+    last_response = ""
+    for response in generate_response(full_prompt):
+        if response != last_response:
+            yield response
+            last_response = response
+def add_feedback(user_message, bot_message, rating, note):
+    feedback_entry = {
+        "user_message": user_message,
+        "bot_message": bot_message,
+        "rating": rating,
+        "note": note,
+        "timestamp": datetime.now().isoformat()
+    }
+    feedback_data.append(feedback_entry)
+    return "Feedback saved successfully!"
+# Gradio interface
+def gradio_chat(message, history):
+    history.append((message, ""))
+    for response in chat_with_zephyr(message, history[:-1]):
+        history[-1] = (message, response)
+        yield history
+def submit_feedback(rating, note, history):
+    if len(history) > 0:
+        last_user_message, last_bot_message = history[-1]
+        add_feedback(last_user_message, last_bot_message, rating, note)
+        return f"Feedback submitted for: '{last_bot_message}'"
+    return "No conversation to provide feedback on."
+def undo_last_message(history):
+    if history:
+        history.pop()
+    return history
+css = """
+body {
+    background-color: #1a1a2e;
+    color: #e0e0ff;
+}
+#chatbot {
+    height: 500px;
+    overflow-y: auto;
+    border: 1px solid #3a3a5e;
+    border-radius: 10px;
+    padding: 10px;
+    background-color: #0a0a1e;
+}
+#chatbot .message {
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 15px;
+}
+#chatbot .user {
+    background-color: #2a2a4e;
+    text-align: right;
+    margin-left: 20%;
+}
+#chatbot .bot {
+    background-color: #3a3a5e;
+    text-align: left;
+    margin-right: 20%;
+}
+#feedback-section {
+    margin-top: 20px;
+    padding: 15px;
+    border: 1px solid #3a3a5e;
+    border-radius: 10px;
+    background-color: #0a0a1e;
+}
+"""
+with gr.Blocks(css=css) as iface:
+    gr.Markdown("# Chat with Zephyr: Your AI Boyfriend is Here! 💘")
+    chatbot = gr.Chatbot(elem_id="chatbot")
+    msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message")
+    with gr.Row():
+        clear = gr.Button("Clear Chat")
+        undo = gr.Button("Undo Last Message")
+    msg.submit(gradio_chat, [msg, chatbot], [chatbot])
+    clear.click(lambda: None, None, chatbot, queue=False)
+    undo.click(undo_last_message, chatbot, chatbot)
+    gr.Markdown("## Rate Zephyr's Last Response")
+    with gr.Row():
+        rating = gr.Slider(minimum=1, maximum=5, step=1, label="Rating (1-5 stars)")
+        feedback_note = gr.Textbox(placeholder="Tell Zephyr how he did...", label="Feedback Note")
+    submit_button = gr.Button("Submit Feedback")
+    feedback_output = gr.Textbox(label="Feedback Status")
+    submit_button.click(submit_feedback, [rating, feedback_note, chatbot], feedback_output)
+# Launch the interface
+iface.launch()
+print("Chat interface is running. Time to finally chat with Zephyr! 💘")