Spaces:

FlameF0X
/

VLM-Chat-Space

Sleeping

App Files Files Community

VLM-Chat-Space / app.py

FlameF0X

Update app.py

3f01f23 verified 2 months ago

raw

history blame contribute delete

1.78 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Define available models
	model_options = {
	"VLM-1-K3": "VortexIntelligence/VLM-1-K3",
	"VLM-1-K2": "VortexIntelligence/VLM-1-K2",
	"VLM-1-K1": "VortexIntelligence/VLM-1-K1",
	}

	# Load models and tokenizers
	models = {}
	tokenizers = {}
	for name, model_id in model_options.items():
	print(f"Loading {name}...")
	tokenizers[name] = AutoTokenizer.from_pretrained(model_id)
	models[name] = AutoModelForCausalLM.from_pretrained(model_id)
	print(f"{name} loaded successfully!")

	def generate_response(message, history, model_choice):
	tokenizer = tokenizers[model_choice]
	model = models[model_choice]
	input_ids = tokenizer(message, return_tensors="pt").input_ids
	input_ids = input_ids[:, -1024:] # Truncate to last 1024 tokens if needed
	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=50,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)
	new_tokens = output[0][input_ids.shape[1]:]
	response = tokenizer.decode(new_tokens, skip_special_tokens=True)
	return response.strip()

	# Create the Gradio interface
	with gr.Blocks() as demo:
	model_choice = gr.Dropdown(choices=list(model_options.keys()), label="Select Model", value="VLM-1-K3")
	chatbot = gr.ChatInterface(
	lambda message, history: generate_response(message, history, model_choice.value),
	theme="soft",
	examples=["Hello, who are you?", "What can you do?", "Tell me a short story"],
	)
	model_choice.change(fn=lambda x: None, inputs=model_choice, outputs=[])

	if __name__ == "__main__":
	demo.launch()