Spaces:

cyrus28214
/

Revisual-R1

Runtime error

App Files Files Community

cyrus28214 commited on Jun 20

Commit

12c4ba7

unverified ·

1 Parent(s): 4745ba0

update

Browse files

Files changed (1) hide show

app.py +5 -36

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from threading import Thread
 import torch
 import spaces
-MODEL_ID = "csfufu/Revisual-R1-final"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForImageTextToText.from_pretrained(
     MODEL_ID,
@@ -14,42 +14,15 @@ model = AutoModelForImageTextToText.from_pretrained(
 ).to("cuda").eval()
 @spaces.GPU
-def inference(input_dict, history):
     text = input_dict["text"]
     files = input_dict["files"]
-    """
-    Create chat history
-    Example history value:
-    [
-        [('pixel.png',), None],
-        ['ignore this image. just say "hi" and nothing else', 'Hi!'],
-        ['just say "hi" and nothing else', 'Hi!']
-    ]
-    """
     all_images = []
     current_message_images = []
     messages = []
-    for val in history:
-        if val[0]:
-            if isinstance(val[0], str):
-                messages.append({
-                    "role": "user",
-                    "content": [
-                        *[{"type": "image", "image": image} for image in current_message_images],
-                        {"type": "text", "text": val[0]},
-                    ],
-                })
-                current_message_images = []
-            else:
-                # Load messages. These will be appended to the first user text message that comes after
-                current_message_images = [load_image(image) for image in val[0]]
-                all_images += current_message_images
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     current_message_images = [load_image(image) for image in files]
     all_images += current_message_images
@@ -61,11 +34,6 @@ def inference(input_dict, history):
         ],
     })
-    #print(messages)
-    """
-    Generate and stream text
-    """
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt],
@@ -86,7 +54,8 @@ def inference(input_dict, history):
         yield buffer
 demo = gr.ChatInterface(
-        fn=inference,
         multimodal=True,
     )

 import torch
 import spaces
+MODEL_ID = "TIGER-Lab/VL-Rethinker-7B"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForImageTextToText.from_pretrained(
     MODEL_ID,
 ).to("cuda").eval()
 @spaces.GPU
+def respond(input_dict, chat_history):
     text = input_dict["text"]
     files = input_dict["files"]
     all_images = []
     current_message_images = []
     messages = []
+    messages.append(chat_history)
     current_message_images = [load_image(image) for image in files]
     all_images += current_message_images
         ],
     })
     prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
         text=[prompt],
         yield buffer
 demo = gr.ChatInterface(
+        fn=respond,
+        type='messages',
         multimodal=True,
     )