Spaces:

FlameF0X
/

VLM-Chat-Space

Sleeping

App Files Files Community

FlameF0X commited on May 7

Commit

3604e16

verified ·

1 Parent(s): 9882ae6

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -17

app.py CHANGED Viewed

@@ -2,18 +2,27 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load model and tokenizer
-print("Loading model and tokenizer...")
-model_id = "PingVortex/VLM-1"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
-print("Model loaded successfully!")
-def generate_response(message, history):
     input_ids = tokenizer(message, return_tensors="pt").input_ids
-    # Truncate to last 1024 tokens if needed
-    input_ids = input_ids[:, -1024:]
     with torch.no_grad():
         output = model.generate(
             input_ids,
@@ -23,18 +32,19 @@ def generate_response(message, history):
             top_p=0.9,
             pad_token_id=tokenizer.eos_token_id
         )
     new_tokens = output[0][input_ids.shape[1]:]
     response = tokenizer.decode(new_tokens, skip_special_tokens=True)
     return response.strip()
 # Create the Gradio interface
-demo = gr.ChatInterface(
-    generate_response,
-    theme="soft",
-    examples=["Hello, who are you?", "What can you do?", "Tell me a short story"],
-)
 if __name__ == "__main__":
     demo.launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Define available models
+model_options = {
+    "VLM-1-K1": "PingVortex/VLM-1-K1",
+    "VLM-1-K2": "PingVortex/VLM-1-K2",
+    "VLM-1-K3": "PingVortex/VLM-1-K3"
+}
+# Load models and tokenizers
+models = {}
+tokenizers = {}
+for name, model_id in model_options.items():
+    print(f"Loading {name}...")
+    tokenizers[name] = AutoTokenizer.from_pretrained(model_id)
+    models[name] = AutoModelForCausalLM.from_pretrained(model_id)
+    print(f"{name} loaded successfully!")
+def generate_response(message, history, model_choice):
+    tokenizer = tokenizers[model_choice]
+    model = models[model_choice]
     input_ids = tokenizer(message, return_tensors="pt").input_ids
+    input_ids = input_ids[:, -1024:]  # Truncate to last 1024 tokens if needed
     with torch.no_grad():
         output = model.generate(
             input_ids,
             top_p=0.9,
             pad_token_id=tokenizer.eos_token_id
         )
     new_tokens = output[0][input_ids.shape[1]:]
     response = tokenizer.decode(new_tokens, skip_special_tokens=True)
     return response.strip()
 # Create the Gradio interface
+with gr.Blocks() as demo:
+    model_choice = gr.Dropdown(choices=list(model_options.keys()), label="Select Model", value="VLM-1-K1")
+    chatbot = gr.ChatInterface(
+        lambda message, history: generate_response(message, history, model_choice.value),
+        theme="soft",
+        examples=["Hello, who are you?", "What can you do?", "Tell me a short story"],
+    )
+    model_choice.change(fn=lambda x: None, inputs=model_choice, outputs=[])
 if __name__ == "__main__":
     demo.launch()