Daemontatox commited on
Commit
488a981
·
verified ·
1 Parent(s): 45c547e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -52
app.py CHANGED
@@ -7,46 +7,47 @@ import gradio as gr
7
  from gradio import FileData
8
  import time
9
  import spaces
10
-
11
- ckpt = "Daemontatox/DocumentCogito"
12
  model = MllamaForConditionalGeneration.from_pretrained(ckpt,
13
  torch_dtype=torch.bfloat16).to("cuda")
14
  processor = AutoProcessor.from_pretrained(ckpt)
15
 
16
- SYSTEM_PROMPT = """You are a helpful AI assistant specialized in analyzing documents, images, and visual content.
17
- Your responses should be clear, accurate, and focused on the specific details present in the provided materials.
18
- When analyzing documents, pay attention to key information, formatting, and context.
19
- For images, consider both obvious and subtle details that might be relevant to the user's query."""
20
 
21
  @spaces.GPU()
22
- def bot_streaming(message, history, max_new_tokens=2048, temperature=0.7):
 
23
  txt = message["text"]
24
  ext_buffer = f"{txt}"
25
 
26
- messages = [{"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]}]
27
  images = []
28
 
29
- for i, msg in enumerate(history):
 
30
  if isinstance(msg[0], tuple):
31
  messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
32
  messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
33
  images.append(Image.open(msg[0][0]).convert("RGB"))
34
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
 
35
  pass
36
- elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
37
  messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
38
  messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
39
 
 
40
  if len(message["files"]) == 1:
41
- if isinstance(message["files"][0], str):
 
42
  image = Image.open(message["files"][0]).convert("RGB")
43
- else:
44
  image = Image.open(message["files"][0]["path"]).convert("RGB")
45
  images.append(image)
46
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
47
  else:
48
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
49
 
 
50
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
51
 
52
  if images == []:
@@ -55,13 +56,8 @@ def bot_streaming(message, history, max_new_tokens=2048, temperature=0.7):
55
  inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
56
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
57
 
58
- generation_kwargs = dict(
59
- inputs,
60
- streamer=streamer,
61
- max_new_tokens=max_new_tokens,
62
- temperature=temperature, # Add temperature parameter
63
- do_sample=True, # Enable sampling for temperature to take effect
64
- )
65
 
66
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
67
  thread.start()
@@ -73,38 +69,32 @@ def bot_streaming(message, history, max_new_tokens=2048, temperature=0.7):
73
  time.sleep(0.01)
74
  yield buffer
75
 
76
- demo = gr.ChatInterface(
77
- fn=bot_streaming,
78
- title="Document Analyzer",
79
- examples=[
80
- [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]}, 200, 0.7],
81
- [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]}, 250, 0.7],
82
- [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]}, 250, 0.7],
83
- [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]}, 250, 0.7],
84
- [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]}, 250, 0.7],
85
- ],
86
- textbox=gr.MultimodalTextbox(),
87
- additional_inputs=[
88
- gr.Slider(
89
- minimum=10,
90
- maximum=500,
91
- value=2048,
92
- step=10,
93
- label="Maximum number of new tokens to generate",
94
- ),
95
- gr.Slider( # Add temperature slider
96
- minimum=0.1,
97
- maximum=2.0,
98
- value=0.2,
99
- step=0.1,
100
- label="Temperature (0.1 = focused, 2.0 = creative)",
101
- )
102
- ],
103
- cache_examples=False,
104
- description="MllM with Temperature Control",
105
- stop_btn="Stop Generation",
106
- fill_height=True,
107
- multimodal=True
108
- )
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  demo.launch(debug=True)
 
7
  from gradio import FileData
8
  import time
9
  import spaces
10
+ ckpt ="Daemontatox/DocumentCogito"
 
11
  model = MllamaForConditionalGeneration.from_pretrained(ckpt,
12
  torch_dtype=torch.bfloat16).to("cuda")
13
  processor = AutoProcessor.from_pretrained(ckpt)
14
 
 
 
 
 
15
 
16
  @spaces.GPU()
17
+ def bot_streaming(message, history, max_new_tokens=2048):
18
+
19
  txt = message["text"]
20
  ext_buffer = f"{txt}"
21
 
22
+ messages= []
23
  images = []
24
 
25
+
26
+ for i, msg in enumerate(history):
27
  if isinstance(msg[0], tuple):
28
  messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
29
  messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
30
  images.append(Image.open(msg[0][0]).convert("RGB"))
31
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
32
+ # messages are already handled
33
  pass
34
+ elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
35
  messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
36
  messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
37
 
38
+ # add current message
39
  if len(message["files"]) == 1:
40
+
41
+ if isinstance(message["files"][0], str): # examples
42
  image = Image.open(message["files"][0]).convert("RGB")
43
+ else: # regular input
44
  image = Image.open(message["files"][0]["path"]).convert("RGB")
45
  images.append(image)
46
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
47
  else:
48
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
49
 
50
+
51
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
52
 
53
  if images == []:
 
56
  inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
57
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
58
 
59
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
60
+ generated_text = ""
 
 
 
 
 
61
 
62
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
63
  thread.start()
 
69
  time.sleep(0.01)
70
  yield buffer
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ demo = gr.ChatInterface(fn=bot_streaming, title="Document Analyzer", examples=[
74
+ [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]},
75
+ 200],
76
+ [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]},
77
+ 250],
78
+ [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]},
79
+ 250],
80
+ [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]},
81
+ 250],
82
+ [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]},
83
+ 250],
84
+ ],
85
+ textbox=gr.MultimodalTextbox(),
86
+ additional_inputs = [gr.Slider(
87
+ minimum=10,
88
+ maximum=500,
89
+ value=2048,
90
+ step=10,
91
+ label="Maximum number of new tokens to generate",
92
+ )
93
+ ],
94
+ cache_examples=False,
95
+ description="MllM ",
96
+ stop_btn="Stop Generation",
97
+ fill_height=True,
98
+ multimodal=True)
99
+
100
  demo.launch(debug=True)