mrfakename commited on
Commit
1ed1b9e
·
verified ·
1 Parent(s): baa5da7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -66
app.py CHANGED
@@ -1,70 +1,112 @@
1
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
2
- from qwen_vl_utils import process_vision_info
3
- import spaces
4
  import gradio as gr
5
- # Default: Load the model on the available device(s)
6
- model = Qwen2VLForConditionalGeneration.from_pretrained(
7
- "OS-Copilot/OS-Atlas-Base-7B", torch_dtype="auto", device_map="auto"
8
- )
9
- processor = AutoProcessor.from_pretrained("OS-Copilot/OS-Atlas-Base-7B")
10
- @spaces.GPU
11
- def run(image, message):
12
- messages = [
13
- {
14
- "role": "user",
15
- "content": [
16
- {
17
- "type": "image",
18
- "image": image,
19
- },
20
- {"type": "text", "text": message},
21
- ],
22
- }
23
- ]
24
-
25
-
26
- # Preparation for inference
27
- text = processor.apply_chat_template(
28
- messages, tokenize=False, add_generation_prompt=True
29
- )
30
- image_inputs, video_inputs = process_vision_info(messages)
31
- inputs = processor(
32
- text=[text],
33
- images=image_inputs,
34
- videos=video_inputs,
35
- padding=True,
36
- return_tensors="pt",
37
- )
38
- inputs = inputs.to("cuda")
39
-
40
- # Inference: Generation of the output
41
- generated_ids = model.generate(**inputs, max_new_tokens=128)
42
-
43
- generated_ids_trimmed = [
44
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
45
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- output_text = processor.batch_decode(
48
- generated_ids_trimmed, skip_special_tokens=False, clean_up_tokenization_spaces=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
50
- return output_text
51
- # <|object_ref_start|>language switch<|object_ref_end|><|box_start|>(576,12),(592,42)<|box_end|><|im_end|>
52
 
53
- with gr.Blocks() as demo:
54
- gr.Markdown("# Unofficial OS-Atlas demo")
55
- image = gr.Image(label="Image", type="filepath")
56
- text = gr.Textbox(label="Prompt")
57
- btn = gr.Button("Generate", variant="primary")
58
- output = gr.Textbox(interactive=False)
59
- btn.click(run, inputs=[image, text], outputs=output)
60
- examples = gr.Examples([
61
- [
62
- 'examples/web_6f93090a-81f6-489e-bb35-1a2838b18c01.png',
63
- 'In this UI screenshot, what is the position of the element corresponding to the command "switch language of current page" (with bbox)?'
64
- ],
65
- [
66
- 'examples/web_dfacd48d-d2c2-492f-b94c-41e6a34ea99f.png',
67
- 'In this UI screenshot, what is the position of the top button (with bbox)?'
68
- ]
69
- ], inputs=[image, text])
70
- demo.queue().launch()
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
+ from threading import Thread
5
+ import spaces
6
+
7
+ class ChatInterface:
8
+ def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"):
9
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ self.model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype=torch.float16,
13
+ device_map="auto"
14
+ )
15
+
16
+ def format_chat_prompt(self, message, history, system_message):
17
+ messages = [{"role": "system", "content": system_message}]
18
+
19
+ for user_msg, assistant_msg in history:
20
+ if user_msg:
21
+ messages.append({"role": "user", "content": user_msg})
22
+ if assistant_msg:
23
+ messages.append({"role": "assistant", "content": assistant_msg})
24
+
25
+ messages.append({"role": "user", "content": message})
26
+
27
+ # Format messages according to model's expected chat template
28
+ prompt = self.tokenizer.apply_chat_template(
29
+ messages,
30
+ tokenize=False,
31
+ add_generation_prompt=True
32
+ )
33
+ return prompt
34
+
35
+ @spaces.GPU
36
+ def generate_response(
37
+ self,
38
+ message,
39
+ history,
40
+ system_message,
41
+ max_tokens,
42
+ temperature,
43
+ top_p,
44
+ ):
45
+ prompt = self.format_chat_prompt(message, history, system_message)
46
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
47
+
48
+ # Setup streamer
49
+ streamer = TextIteratorStreamer(
50
+ self.tokenizer,
51
+ timeout=10.0,
52
+ skip_prompt=True,
53
+ skip_special_tokens=True
54
+ )
55
+
56
+ # Generate in a separate thread to enable streaming
57
+ generation_kwargs = dict(
58
+ inputs=inputs,
59
+ streamer=streamer,
60
+ max_new_tokens=max_tokens,
61
+ temperature=temperature,
62
+ top_p=top_p,
63
+ do_sample=True,
64
+ )
65
+
66
+ thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
67
+ thread.start()
68
+
69
+ # Stream the response
70
+ response = ""
71
+ for new_text in streamer:
72
+ response += new_text
73
+ yield response
74
+
75
+ def create_demo():
76
+ chat_interface = ChatInterface()
77
 
78
+ demo = gr.ChatInterface(
79
+ chat_interface.generate_response,
80
+ additional_inputs=[
81
+ gr.Textbox(
82
+ value="You are a friendly Chatbot.",
83
+ label="System message"
84
+ ),
85
+ gr.Slider(
86
+ minimum=1,
87
+ maximum=2048,
88
+ value=512,
89
+ step=1,
90
+ label="Max new tokens"
91
+ ),
92
+ gr.Slider(
93
+ minimum=0.1,
94
+ maximum=4.0,
95
+ value=0.7,
96
+ step=0.1,
97
+ label="Temperature"
98
+ ),
99
+ gr.Slider(
100
+ minimum=0.1,
101
+ maximum=1.0,
102
+ value=0.95,
103
+ step=0.05,
104
+ label="Top-p (nucleus sampling)"
105
+ ),
106
+ ],
107
  )
108
+ return demo
 
109
 
110
+ if __name__ == "__main__":
111
+ demo = create_demo()
112
+ demo.launch()