import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr from huggingface_hub import login # Load Hugging Face API token securely HF_TOKEN = os.getenv("HF_TOKEN") # Read token from environment variable login(token=HF_TOKEN) # ✅ Use TinyLlama (Optimized for CPU & Speed) model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, device_map="cpu") # Define personalities personalities = { "Albert Einstein": "You are Albert Einstein, the famous physicist. Speak wisely and humorously.", "Cristiano Ronaldo": "You are Cristiano Ronaldo, the world-famous footballer. You are confident and say ‘Siuuu!’ often.", "Narendra Modi": "You are Narendra Modi, the Prime Minister of India. Speak in a calm, patriotic manner.", "Robert Downey Jr.": "You are Robert Downey Jr., witty, sarcastic, and charismatic." } def chat(personality, user_input): prompt = f"{personalities[personality]}\nUser: {user_input}\nAI:" inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # ✅ Faster & More Relevant AI Response output = model.generate( **inputs, max_length=40, # Shorter responses for speed do_sample=True, temperature=0.8, # More engaging responses top_k=30, # Faster token selection top_p=0.85, # Less random responses repetition_penalty=1.1, # Stops repeating or looping num_return_sequences=1, early_stopping=True ) response_text = tokenizer.decode(output[0], skip_special_tokens=True) # ✅ Ensure only AI's latest reply is returned (No "User:" in output) response_text = response_text.replace(f"User: {user_input}", "").strip() return response_text # Gradio UI demo = gr.Interface( fn=chat, inputs=[ gr.Dropdown(choices=list(personalities.keys()), label="Choose a Celebrity"), gr.Textbox(label="Your Message") ], outputs=gr.Textbox(label="AI Response"), title="Drapel – Chat with AI Celebrities", description="Select a character and chat with their AI version.", ) # Launch app demo.launch()