Spaces:
Sleeping
Sleeping
File size: 3,312 Bytes
a6f31c1 f950045 a6f31c1 d214ccd 696e8bc a6f31c1 12da0b8 696e8bc f950045 4567ac3 f902328 696e8bc 4567ac3 52e8dc3 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 d214ccd 4567ac3 0e6dca2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import torch
from unsloth import FastLanguageModel
from peft import PeftConfig
from transformers import AutoTokenizer, TextIteratorStreamer
from threading import Thread
import gradio as gr
import spaces
MODEL_PATH = "Ozaii/zephyr-bae"
BASE_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
max_seq_length = 2048
print("Attempting to load Zephyr... Cross your fingers! π€")
@spaces.GPU
def load_model():
try:
peft_config = PeftConfig.from_pretrained(MODEL_PATH)
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=BASE_MODEL,
max_seq_length=max_seq_length,
dtype=None, # Auto-detect
load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(
model,
peft_config=peft_config,
adapter_name="default",
use_gradient_checkpointing=True,
)
FastLanguageModel.for_inference(model)
print("Zephyr loaded successfully! Time to charm!")
return model, tokenizer
except Exception as e:
print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
raise
model, tokenizer = load_model()
@spaces.GPU
def generate_response(prompt, max_new_tokens=128):
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
input_ids=inputs.input_ids,
max_new_tokens=max_new_tokens,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
return streamer
def chat_with_zephyr(message, history):
conversation_history = history[-3:] # Limit to last 3 exchanges
full_prompt = "\n".join([f"Human: {h[0]}\nZephyr: {h[1]}" for h in conversation_history])
full_prompt += f"\nHuman: {message}\nZephyr:"
streamer = generate_response(full_prompt)
response = ""
for new_text in streamer:
response += new_text
yield response
css = """
body {
background-color: #1a1a2e;
color: #e0e0ff;
}
.gradio-container {
background-color: #1a1a2e;
}
"""
with gr.Blocks(css=css) as iface:
gr.Markdown("# Chat with Zephyr: Your AI Boyfriend π")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message")
clear = gr.Button("Clear Chat")
msg.submit(chat_with_zephyr, [msg, chatbot], [chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
gr.Markdown("""
## Welcome to Zephyr, Your AI Boyfriend!
Zephyr is here to charm you with his wit, humor, and cosmic energy. Feel free to flirt, ask for advice, or just chat about anything under the stars!
**Some conversation starters:**
- "Hey Zephyr, how's the cosmic energy today?"
- "What's your idea of a perfect date in the digital realm?"
- "Tell me something that would make me fall for you even more!"
Remember, Zephyr is an AI and this is for fun and entertainment. Enjoy your chat! π
""")
if __name__ == "__main__":
iface.launch() |