Spaces:

Emova-ollm
/

RACRO-demo

Running on Zero

App Files Files Community

KaiChen1998 commited on Jun 16

Commit

9350a8c

1 Parent(s): e1b6b95

convert to HF backend

Browse files

Files changed (1) hide show

app.py +8 -10

app.py CHANGED Viewed

@@ -13,7 +13,9 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
 ##########################################
 # LLM part
 ##########################################
-from transformers import AutoProcessor, AutoTokenizer, TextIteratorStreamer
 from vllm import LLM, SamplingParams
 from qwen_vl_utils import process_vision_info
 from threading import Thread
@@ -32,14 +34,11 @@ LLM_MODEL_PATH = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
 processor = AutoProcessor.from_pretrained(MLLM_MODEL_PATH)
 tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_PATH)
-mllm = LLM(model=MLLM_MODEL_PATH, gpu_memory_utilization=0.8,
-           device='cuda:0', dtype="bfloat16", limit_mm_per_prompt={"image": 1})
-llm = LLM(model=LLM_MODEL_PATH, tensor_parallel_size=1, gpu_memory_utilization=0.8,
-          device='cuda:0', dtype="bfloat16")
-mllm_sampling = SamplingParams(temperature=0, max_tokens=8192)
-llm_sampling = SamplingParams(temperature=0.6, top_p=0.95, max_tokens=8192)
 # === Build Prompts ===
 def build_messages(image_path, question):
@@ -171,8 +170,7 @@ title_markdown = ("""
     <h1 style="margin: 0;">RACRO: Perceptual Decoupling for Scalable Multi-modal Reasoning via Reward-Optimized Captioning</h1>
     <h2 style="margin: 10px 0;">📃 <a href="https://www.arxiv.org/abs/2506.04559" style="font-weight: 400;">Paper</a> | 💻 <a href="https://github.com/gyhdog99/RACRO2" style="font-weight: 400;">Code</a> | 🤗 <a href="https://huggingface.co/collections/KaiChen1998/racro-6848ec8c65b3a0bf33d0fbdb" style="font-weight: 400;">HuggingFace</a></h2>
     <p  style="margin: 20px 0;">
-      <strong>1. RACRO is designed for multi-modal reasoning, and thus, image inputs are <mark>ALWAYS</mark> necessary!</strong><br/>
-      <strong>2. Models are deployed with vLLM, which unfortunately, still does not support streaming outputs for MLLMs.</strong>
     </p>
   </div>
 </div>

 ##########################################
 # LLM part
 ##########################################
+import torch
+from transformers import AutoProcessor, AutoTokenizer
+from transformers import Qwen2ForCausalLM, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
 from vllm import LLM, SamplingParams
 from qwen_vl_utils import process_vision_info
 from threading import Thread
 processor = AutoProcessor.from_pretrained(MLLM_MODEL_PATH)
 tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_PATH)
+mllm = Qwen2_5_VLForConditionalGeneration.from_pretrained(MLLM_MODEL_PATH, torch_dtype=torch.bfloat16, device_map="auto")
+llm = Qwen2ForCausalLM.from_pretrained(LLM_MODEL_PATH, torch_dtype=torch.bfloat16, device_map="auto")
+mllm_sampling = dict(temperature=0, max_tokens=8192)
+llm_sampling = dict(temperature=0.6, top_p=0.95, max_tokens=8192)
 # === Build Prompts ===
 def build_messages(image_path, question):
     <h1 style="margin: 0;">RACRO: Perceptual Decoupling for Scalable Multi-modal Reasoning via Reward-Optimized Captioning</h1>
     <h2 style="margin: 10px 0;">📃 <a href="https://www.arxiv.org/abs/2506.04559" style="font-weight: 400;">Paper</a> | 💻 <a href="https://github.com/gyhdog99/RACRO2" style="font-weight: 400;">Code</a> | 🤗 <a href="https://huggingface.co/collections/KaiChen1998/racro-6848ec8c65b3a0bf33d0fbdb" style="font-weight: 400;">HuggingFace</a></h2>
     <p  style="margin: 20px 0;">
+      <strong>1. RACRO is designed for multi-modal reasoning, and thus, image inputs are <mark>ALWAYS</mark> necessary!</strong>
     </p>
   </div>
 </div>