Spaces:
Sleeping
Sleeping
File size: 1,782 Bytes
9dc564e 3604e16 992cb1e 3604e16 9dc564e 3604e16 9dc564e 3604e16 9dc564e 3604e16 3f01f23 3604e16 9dc564e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Define available models
model_options = {
"VLM-1-K3": "VortexIntelligence/VLM-1-K3",
"VLM-1-K2": "VortexIntelligence/VLM-1-K2",
"VLM-1-K1": "VortexIntelligence/VLM-1-K1",
}
# Load models and tokenizers
models = {}
tokenizers = {}
for name, model_id in model_options.items():
print(f"Loading {name}...")
tokenizers[name] = AutoTokenizer.from_pretrained(model_id)
models[name] = AutoModelForCausalLM.from_pretrained(model_id)
print(f"{name} loaded successfully!")
def generate_response(message, history, model_choice):
tokenizer = tokenizers[model_choice]
model = models[model_choice]
input_ids = tokenizer(message, return_tensors="pt").input_ids
input_ids = input_ids[:, -1024:] # Truncate to last 1024 tokens if needed
with torch.no_grad():
output = model.generate(
input_ids,
max_new_tokens=50,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
new_tokens = output[0][input_ids.shape[1]:]
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
return response.strip()
# Create the Gradio interface
with gr.Blocks() as demo:
model_choice = gr.Dropdown(choices=list(model_options.keys()), label="Select Model", value="VLM-1-K3")
chatbot = gr.ChatInterface(
lambda message, history: generate_response(message, history, model_choice.value),
theme="soft",
examples=["Hello, who are you?", "What can you do?", "Tell me a short story"],
)
model_choice.change(fn=lambda x: None, inputs=model_choice, outputs=[])
if __name__ == "__main__":
demo.launch() |