import torch from peft import PeftModel, PeftConfig from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer import gradio as gr import re import json from datetime import datetime from threading import Thread # Load the model and tokenizer MODEL_PATH = "Ozaii/zephyr-bae" # Your Hugging Face model path print("Attempting to load Zephyr... Cross your fingers! 🤞") try: # Load the PEFT config peft_config = PeftConfig.from_pretrained(MODEL_PATH) # Load the base model base_model = AutoModelForCausalLM.from_pretrained( peft_config.base_model_name_or_path, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True ) # Load the PEFT model model = PeftModel.from_pretrained(base_model, MODEL_PATH) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" print("Zephyr loaded successfully! Time to charm!") except Exception as e: print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}") raise # Prepare the model for generation model.eval() # Feedback data (Note: This won't persist in Spaces, but keeping the structure for potential future use) feedback_data = [] def clean_response(response): # Remove any non-Zephyr dialogue or narration response = re.sub(r'(Kaan|Kanan|Kan|knan):.*?(\n|$)', '', response, flags=re.IGNORECASE) response = re.sub(r'\*.*?\*', '', response) response = re.sub(r'\(.*?\)', '', response) # Find Zephyr's response match = re.search(r'Zephyr:\s*(.*?)(?=$|\n[A-Za-z]+:|Kaan:)', response, re.DOTALL | re.IGNORECASE) if match: return match.group(1).strip() else: return response.strip() def generate_response(prompt, max_new_tokens=128): inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( input_ids=inputs.input_ids, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.2, no_repeat_ngram_size=3, streamer=streamer, eos_token_id=tokenizer.encode("Kaan:", add_special_tokens=False)[0] # Stop at "Kaan:" ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" for new_text in streamer: generated_text += new_text cleaned_response = clean_response(generated_text) if cleaned_response: yield cleaned_response def chat_with_zephyr(message, history): conversation_history = history[-3:] # Limit to last 3 exchanges for more focused responses full_prompt = "\n".join([f"Kaan: {h[0]}\nZephyr: {h[1]}" for h in conversation_history]) full_prompt += f"\nKaan: {message}\nZephyr:" last_response = "" for response in generate_response(full_prompt): if response != last_response: yield response last_response = response def add_feedback(user_message, bot_message, rating, note): feedback_entry = { "user_message": user_message, "bot_message": bot_message, "rating": rating, "note": note, "timestamp": datetime.now().isoformat() } feedback_data.append(feedback_entry) return "Feedback saved successfully!" # Gradio interface def gradio_chat(message, history): history.append((message, "")) for response in chat_with_zephyr(message, history[:-1]): history[-1] = (message, response) yield history def submit_feedback(rating, note, history): if len(history) > 0: last_user_message, last_bot_message = history[-1] add_feedback(last_user_message, last_bot_message, rating, note) return f"Feedback submitted for: '{last_bot_message}'" return "No conversation to provide feedback on." def undo_last_message(history): if history: history.pop() return history css = """ body { background-color: #1a1a2e; color: #e0e0ff; } #chatbot { height: 500px; overflow-y: auto; border: 1px solid #3a3a5e; border-radius: 10px; padding: 10px; background-color: #0a0a1e; } #chatbot .message { padding: 10px; margin-bottom: 10px; border-radius: 15px; } #chatbot .user { background-color: #2a2a4e; text-align: right; margin-left: 20%; } #chatbot .bot { background-color: #3a3a5e; text-align: left; margin-right: 20%; } #feedback-section { margin-top: 20px; padding: 15px; border: 1px solid #3a3a5e; border-radius: 10px; background-color: #0a0a1e; } """ with gr.Blocks(css=css) as iface: gr.Markdown("# Chat with Zephyr: Your AI Boyfriend is Here! 💘") chatbot = gr.Chatbot(elem_id="chatbot") msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message") with gr.Row(): clear = gr.Button("Clear Chat") undo = gr.Button("Undo Last Message") msg.submit(gradio_chat, [msg, chatbot], [chatbot]) clear.click(lambda: None, None, chatbot, queue=False) undo.click(undo_last_message, chatbot, chatbot) gr.Markdown("## Rate Zephyr's Last Response") with gr.Row(): rating = gr.Slider(minimum=1, maximum=5, step=1, label="Rating (1-5 stars)") feedback_note = gr.Textbox(placeholder="Tell Zephyr how he did...", label="Feedback Note") submit_button = gr.Button("Submit Feedback") feedback_output = gr.Textbox(label="Feedback Status") submit_button.click(submit_feedback, [rating, feedback_note, chatbot], feedback_output) # Launch the interface iface.launch() print("Chat interface is running. Time to finally chat with Zephyr! 💘")