Souvik3333 commited on
Commit
717f406
Β·
verified Β·
1 Parent(s): 3d8d5ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -42
app.py CHANGED
@@ -3,7 +3,7 @@ from PIL import Image
3
  from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
4
  import torch
5
  import spaces
6
- import threading
7
 
8
  model_path = "nanonets/Nanonets-OCR-s"
9
 
@@ -62,40 +62,37 @@ def ocr_image_gradio_stream(image, max_tokens=4096):
62
 
63
  # Set up streaming
64
  streamer = TextIteratorStreamer(
65
- tokenizer=tokenizer,
66
- skip_prompt=True,
67
- skip_special_tokens=True,
68
- clean_up_tokenization_spaces=True
69
  )
70
 
71
  generation_kwargs = {
72
  **inputs,
73
  "max_new_tokens": max_tokens,
74
  "do_sample": False,
75
- "streamer": streamer,
76
  }
77
 
78
  # Start generation in a separate thread
79
- generation_thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
80
- generation_thread.start()
81
 
82
- # Stream the output
83
- partial_output = ""
84
- for new_token in streamer:
85
- partial_output += new_token
86
- processed_output = process_tags(partial_output)
87
- yield processed_output
88
 
89
- # Ensure thread completes
90
- generation_thread.join()
91
-
92
  except Exception as e:
93
  yield f"Error processing image: {str(e)}"
94
 
95
- # Non-streaming version as fallback
96
  @spaces.GPU()
97
  def ocr_image_gradio(image, max_tokens=4096):
98
- """Process image through Nanonets OCR model for Gradio interface"""
99
  if image is None:
100
  return "Please upload an image."
101
 
@@ -148,9 +145,6 @@ with gr.Blocks(title="Nanonets OCR Demo") as demo:
148
  πŸ’» GitHub Repository
149
  </a>
150
  </div>
151
- <p style="font-size: 0.9em; color: #10b981; font-weight: 500;">
152
- ✨ Now with streaming output and support for 4 concurrent uploads!
153
- </p>
154
  </div>
155
  """)
156
 
@@ -171,16 +165,9 @@ with gr.Blocks(title="Nanonets OCR Demo") as demo:
171
  )
172
  extract_btn = gr.Button("Extract Text", variant="primary", size="lg")
173
 
174
- gr.Markdown("""
175
- **πŸ’‘ Tips:**
176
- - Upload supports concurrent processing of up to 4 images
177
- - Results stream in real-time as they're generated
178
- - Automatic processing starts when you upload an image
179
- """)
180
-
181
  with gr.Column(scale=2):
182
  output_text = gr.Markdown(
183
- label="Streaming model prediction",
184
  latex_delimiters=[
185
  {"left": "$$", "right": "$$", "display": True},
186
  {"left": "$", "right": "$", "display": False},
@@ -194,7 +181,7 @@ with gr.Blocks(title="Nanonets OCR Demo") as demo:
194
  show_copy_button=True,
195
  )
196
 
197
- # Event handlers with streaming
198
  extract_btn.click(
199
  fn=ocr_image_gradio_stream,
200
  inputs=[image_input, max_tokens_slider],
@@ -240,14 +227,4 @@ for downstream processing by Large Language Models (LLMs).
240
  """)
241
 
242
  if __name__ == "__main__":
243
- # Configure for concurrent processing with streaming support
244
- demo.queue(
245
- max_size=1000, # Maximum queue size
246
- default_concurrency_limit=4, # Allow 4 concurrent requests
247
- status_update_rate=0.1, # Update status every 100ms for better streaming experience
248
- ).launch(
249
- server_name="0.0.0.0",
250
- server_port=7860,
251
- show_error=True,
252
- share=False
253
- )
 
3
  from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
4
  import torch
5
  import spaces
6
+ from threading import Thread
7
 
8
  model_path = "nanonets/Nanonets-OCR-s"
9
 
 
62
 
63
  # Set up streaming
64
  streamer = TextIteratorStreamer(
65
+ tokenizer,
66
+ timeout=60.0,
67
+ skip_prompt=True,
68
+ skip_special_tokens=True
69
  )
70
 
71
  generation_kwargs = {
72
  **inputs,
73
  "max_new_tokens": max_tokens,
74
  "do_sample": False,
75
+ "streamer": streamer
76
  }
77
 
78
  # Start generation in a separate thread
79
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
80
+ thread.start()
81
 
82
+ # Stream the results
83
+ generated_text = ""
84
+ for new_text in streamer:
85
+ generated_text += new_text
86
+ processed_text = process_tags(generated_text)
87
+ yield processed_text
88
 
 
 
 
89
  except Exception as e:
90
  yield f"Error processing image: {str(e)}"
91
 
92
+ # Keep the original function for non-streaming use if needed
93
  @spaces.GPU()
94
  def ocr_image_gradio(image, max_tokens=4096):
95
+ """Process image through Nanonets OCR model for Gradio interface (non-streaming)"""
96
  if image is None:
97
  return "Please upload an image."
98
 
 
145
  πŸ’» GitHub Repository
146
  </a>
147
  </div>
 
 
 
148
  </div>
149
  """)
150
 
 
165
  )
166
  extract_btn = gr.Button("Extract Text", variant="primary", size="lg")
167
 
 
 
 
 
 
 
 
168
  with gr.Column(scale=2):
169
  output_text = gr.Markdown(
170
+ label="Formatted model prediction",
171
  latex_delimiters=[
172
  {"left": "$$", "right": "$$", "display": True},
173
  {"left": "$", "right": "$", "display": False},
 
181
  show_copy_button=True,
182
  )
183
 
184
+ # Event handlers - Updated to use streaming
185
  extract_btn.click(
186
  fn=ocr_image_gradio_stream,
187
  inputs=[image_input, max_tokens_slider],
 
227
  """)
228
 
229
  if __name__ == "__main__":
230
+ demo.queue().launch()