Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -345,10 +345,12 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
345 |
],
|
346 |
label="Select Model",
|
347 |
value="Camel-Doc-OCR-080125(v2)")
|
348 |
-
|
349 |
-
|
350 |
gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Outpost/discussions)")
|
351 |
-
|
|
|
|
|
|
|
352 |
# Define the submit button actions
|
353 |
image_submit.click(fn=generate_image,
|
354 |
inputs=[
|
|
|
345 |
],
|
346 |
label="Select Model",
|
347 |
value="Camel-Doc-OCR-080125(v2)")
|
348 |
+
|
|
|
349 |
gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Outpost/discussions)")
|
350 |
+
gr.Markdown("> Camel-Doc-OCR-080125 is a specialized vision-language model, fine-tuned from Qwen2.5-VL-7B-Instruct, and excels at document retrieval, content extraction, and analysis recognition for both structured and unstructured digital documents. OCRFlux-3B is a 3B-parameter vision-language model optimized for high-quality OCR on PDFs and images, excelling in converting documents to clean Markdown text and supporting features like cross-page table/paragraph merging.")
|
351 |
+
gr.Markdown("> Both ViGoRL-MCTS-SFT-3b-Spatial and 7b-Spatial are vision-language models that use multi-turn visually grounded reinforcement learning for precise spatial reasoning and visual grounding, with the 3b and 7b variants differing mainly in their architectural size for fine-grained visual tasks.")
|
352 |
+
gr.Markdown("> Owlet-safety-3b-1 is tailored for multi-label safety event detection in video, such as identifying fire, smoke, fall, and theft, and is useful for surveillance and incident detection scenarios. MonkeyOCR-pro-1.2B is a lightweight yet powerful model for document parsing using a structure-recognition-relation triplet method, delivering state-of-the-art OCR results.")
|
353 |
+
|
354 |
# Define the submit button actions
|
355 |
image_submit.click(fn=generate_image,
|
356 |
inputs=[
|