Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Define available models | |
model_options = { | |
"VLM-1-K3": "VortexIntelligence/VLM-1-K3", | |
"VLM-1-K2": "VortexIntelligence/VLM-1-K2", | |
"VLM-1-K1": "VortexIntelligence/VLM-1-K1", | |
} | |
# Load models and tokenizers | |
models = {} | |
tokenizers = {} | |
for name, model_id in model_options.items(): | |
print(f"Loading {name}...") | |
tokenizers[name] = AutoTokenizer.from_pretrained(model_id) | |
models[name] = AutoModelForCausalLM.from_pretrained(model_id) | |
print(f"{name} loaded successfully!") | |
def generate_response(message, history, model_choice): | |
tokenizer = tokenizers[model_choice] | |
model = models[model_choice] | |
input_ids = tokenizer(message, return_tensors="pt").input_ids | |
input_ids = input_ids[:, -1024:] # Truncate to last 1024 tokens if needed | |
with torch.no_grad(): | |
output = model.generate( | |
input_ids, | |
max_new_tokens=50, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
new_tokens = output[0][input_ids.shape[1]:] | |
response = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
return response.strip() | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
model_choice = gr.Dropdown(choices=list(model_options.keys()), label="Select Model", value="VLM-1-K3") | |
chatbot = gr.ChatInterface( | |
lambda message, history: generate_response(message, history, model_choice.value), | |
theme="soft", | |
examples=["Hello, who are you?", "What can you do?", "Tell me a short story"], | |
) | |
model_choice.change(fn=lambda x: None, inputs=model_choice, outputs=[]) | |
if __name__ == "__main__": | |
demo.launch() |