File size: 5,934 Bytes
a6f31c1
 
4567ac3
 
 
 
 
a6f31c1
 
4567ac3
12da0b8
4567ac3
 
 
 
 
 
a6f31c1
4567ac3
 
 
 
12da0b8
4567ac3
12da0b8
4567ac3
a6f31c1
4567ac3
12da0b8
a6f31c1
4567ac3
 
 
 
 
 
 
 
 
a6f31c1
4567ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6f31c1
4567ac3
 
 
 
 
 
 
 
 
 
 
a6f31c1
4567ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12da0b8
 
 
 
 
 
 
 
 
 
 
 
 
4567ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
import re
import json
from datetime import datetime
from threading import Thread

# Load the model and tokenizer
MODEL_PATH = "Ozaii/zephyr-bae"

print("Attempting to load Zephyr... Cross your fingers! 🀞")

try:
    # Load the PEFT config
    peft_config = PeftConfig.from_pretrained(MODEL_PATH)
    
    # Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(
        peft_config.base_model_name_or_path,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True,
        device_map="auto",
        trust_remote_code=True  # Add this line
    )
    
    # Load the PEFT model
    model = PeftModel.from_pretrained(base_model, MODEL_PATH, is_trainable=False)
    
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    
    print("Zephyr loaded successfully! Time to charm!")
except Exception as e:
    print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
    raise

# Prepare the model for generation
model.eval()

# Feedback data (Note: This won't persist in Spaces, but keeping the structure for potential future use)
feedback_data = []

def clean_response(response):
    # Remove any non-Zephyr dialogue or narration
    response = re.sub(r'(Kaan|Kanan|Kan|knan):.*?(\n|$)', '', response, flags=re.IGNORECASE)
    response = re.sub(r'\*.*?\*', '', response)
    response = re.sub(r'\(.*?\)', '', response)

    # Find Zephyr's response
    match = re.search(r'Zephyr:\s*(.*?)(?=$|\n[A-Za-z]+:|Kaan:)', response, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip()
    else:
        return response.strip()

def generate_response(prompt, max_new_tokens=128):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    generation_kwargs = dict(
        input_ids=inputs.input_ids,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        no_repeat_ngram_size=3,
        streamer=streamer,
        eos_token_id=tokenizer.encode("Kaan:", add_special_tokens=False)[0]  # Stop at "Kaan:"
    )
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    generated_text = ""
    for new_text in streamer:
        generated_text += new_text
        cleaned_response = clean_response(generated_text)
        if cleaned_response:
            yield cleaned_response

def chat_with_zephyr(message, history):
    conversation_history = history[-3:]  # Limit to last 3 exchanges for more focused responses

    full_prompt = "\n".join([f"Kaan: {h[0]}\nZephyr: {h[1]}" for h in conversation_history])
    full_prompt += f"\nKaan: {message}\nZephyr:"

    last_response = ""
    for response in generate_response(full_prompt):
        if response != last_response:
            yield response
            last_response = response

def add_feedback(user_message, bot_message, rating, note):
    feedback_entry = {
        "user_message": user_message,
        "bot_message": bot_message,
        "rating": rating,
        "note": note,
        "timestamp": datetime.now().isoformat()
    }
    feedback_data.append(feedback_entry)
    return "Feedback saved successfully!"

# Gradio interface
def chat_with_zephyr(message, history):
    # Implement your chat logic here
    response = "Hello! I'm Zephyr. How can I help you today?"  # Placeholder
    return response

iface = gr.ChatInterface(
    chat_with_zephyr,
    title="Chat with Zephyr: Your AI Boyfriend",
    description="Zephyr is an AI trained to be your virtual boyfriend. Chat with him and see where the conversation goes!",
    examples=["Hey Zephyr, how are you feeling today?", "What's your idea of a perfect date?", "Tell me something romantic!"],
    cache_examples=False,
)


css = """
body {
    background-color: #1a1a2e;
    color: #e0e0ff;
}
#chatbot {
    height: 500px;
    overflow-y: auto;
    border: 1px solid #3a3a5e;
    border-radius: 10px;
    padding: 10px;
    background-color: #0a0a1e;
}
#chatbot .message {
    padding: 10px;
    margin-bottom: 10px;
    border-radius: 15px;
}
#chatbot .user {
    background-color: #2a2a4e;
    text-align: right;
    margin-left: 20%;
}
#chatbot .bot {
    background-color: #3a3a5e;
    text-align: left;
    margin-right: 20%;
}
#feedback-section {
    margin-top: 20px;
    padding: 15px;
    border: 1px solid #3a3a5e;
    border-radius: 10px;
    background-color: #0a0a1e;
}
"""

with gr.Blocks(css=css) as iface:
    gr.Markdown("# Chat with Zephyr: Your AI Boyfriend is Here! πŸ’˜")
    chatbot = gr.Chatbot(elem_id="chatbot")
    msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message")
    with gr.Row():
        clear = gr.Button("Clear Chat")
        undo = gr.Button("Undo Last Message")

    msg.submit(gradio_chat, [msg, chatbot], [chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)
    undo.click(undo_last_message, chatbot, chatbot)

    gr.Markdown("## Rate Zephyr's Last Response")
    with gr.Row():
        rating = gr.Slider(minimum=1, maximum=5, step=1, label="Rating (1-5 stars)")
        feedback_note = gr.Textbox(placeholder="Tell Zephyr how he did...", label="Feedback Note")
    submit_button = gr.Button("Submit Feedback")
    feedback_output = gr.Textbox(label="Feedback Status")

    submit_button.click(submit_feedback, [rating, feedback_note, chatbot], feedback_output)

# Launch the interface
iface.launch()

print("Chat interface is running. Time to finally chat with Zephyr! πŸ’˜")