Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
|
|
21 |
try:
|
22 |
messages = []
|
23 |
# Optional: Resize image if needed (to avoid huge uploads)
|
24 |
-
max_size =
|
25 |
if max(image.size) > max_size:
|
26 |
ratio = max_size / max(image.size)
|
27 |
new_size = tuple(int(dim * ratio) for dim in image.size)
|
@@ -75,7 +75,12 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
|
|
75 |
|
76 |
print("=== DEBUG: Creating Gradio interface ===")
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
gr.HTML("""
|
80 |
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
|
81 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">ποΈ NuMarkdown-8B-Thinking</h1>
|
@@ -90,20 +95,38 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
|
|
90 |
<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π€ Model</a>
|
91 |
</div>
|
92 |
</div>
|
93 |
-
|
94 |
<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
|
95 |
<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
|
96 |
""")
|
97 |
|
98 |
with gr.Row():
|
99 |
-
with gr.Column():
|
100 |
temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
|
|
|
101 |
img_in = gr.Image(type="pil", label="Upload Image")
|
102 |
-
|
103 |
-
with gr.Column():
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
btn.click(
|
109 |
query_vllm_api,
|
|
|
21 |
try:
|
22 |
messages = []
|
23 |
# Optional: Resize image if needed (to avoid huge uploads)
|
24 |
+
max_size = 2048
|
25 |
if max(image.size) > max_size:
|
26 |
ratio = max_size / max(image.size)
|
27 |
new_size = tuple(int(dim * ratio) for dim in image.size)
|
|
|
75 |
|
76 |
print("=== DEBUG: Creating Gradio interface ===")
|
77 |
|
78 |
+
|
79 |
+
with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft(), css="""
|
80 |
+
* {
|
81 |
+
font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important;
|
82 |
+
}
|
83 |
+
""") as demo:
|
84 |
gr.HTML("""
|
85 |
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
|
86 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">ποΈ NuMarkdown-8B-Thinking</h1>
|
|
|
95 |
<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π€ Model</a>
|
96 |
</div>
|
97 |
</div>
|
|
|
98 |
<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
|
99 |
<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
|
100 |
""")
|
101 |
|
102 |
with gr.Row():
|
103 |
+
with gr.Column(scale=2):
|
104 |
temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
|
105 |
+
btn = gr.Button("Generate Response", variant="primary", size="lg")
|
106 |
img_in = gr.Image(type="pil", label="Upload Image")
|
107 |
+
|
108 |
+
with gr.Column(scale=2):
|
109 |
+
|
110 |
+
# Debug section - collapsible
|
111 |
+
with gr.Accordion("π Model Outputs", open=True):
|
112 |
+
with gr.Tabs():
|
113 |
+
with gr.TabItem("π§ Thinking Trace"):
|
114 |
+
thinking = gr.Textbox(
|
115 |
+
lines=15,
|
116 |
+
max_lines=25,
|
117 |
+
show_label=False,
|
118 |
+
placeholder="The model's reasoning process will appear here..."
|
119 |
+
)
|
120 |
+
|
121 |
+
with gr.TabItem("π Raw Markdown"):
|
122 |
+
raw_answer = gr.Textbox(
|
123 |
+
lines=15,
|
124 |
+
max_lines=25,
|
125 |
+
show_label=False,
|
126 |
+
placeholder="The raw model output will appear here..."
|
127 |
+
)
|
128 |
+
with gr.TabItem("π Rendered Markdown"):
|
129 |
+
output = gr.Markdown(label="π Generated Markdown")
|
130 |
|
131 |
btn.click(
|
132 |
query_vllm_api,
|