liamcripwell commited on
Commit
841fa30
ยท
verified ยท
1 Parent(s): d890ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -27
app.py CHANGED
@@ -4,6 +4,8 @@ import base64
4
  from PIL import Image
5
  from io import BytesIO
6
 
 
 
7
  def encode_image_to_base64(image: Image.Image) -> str:
8
  buffered = BytesIO()
9
  image.save(buffered, format="JPEG")
@@ -11,8 +13,13 @@ def encode_image_to_base64(image: Image.Image) -> str:
11
  return f"data:image/jpeg;base64,{img_str}"
12
 
13
  def query_vllm_api(image, temperature, max_tokens=12_000):
14
- messages = []
15
- if image is not None:
 
 
 
 
 
16
  # Optional: Resize image if needed (to avoid huge uploads)
17
  max_size = 1024
18
  if max(image.size) > max_size:
@@ -28,14 +35,14 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
28
  ]
29
  })
30
 
31
- payload = {
32
- "model": "numind/NuMarkdown-8B-Thinking",
33
- "messages": messages,
34
- "max_tokens": max_tokens,
35
- "temperature": temperature
36
- }
37
 
38
- try:
39
  response = requests.post(
40
  "http://localhost:8000/v1/chat/completions",
41
  json=payload,
@@ -45,32 +52,35 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
45
  data = response.json()
46
 
47
  result = data["choices"][0]["message"]["content"]
48
- reasoning = result.split("<think>")[1].split("</think>")[0]
49
- answer = result.split("<answer>")[1].split("</answer>")[0]
 
 
 
 
 
 
 
50
 
51
  return reasoning, answer, answer
 
52
  except requests.exceptions.RequestException as e:
53
- return f"API request failed: {e}"
 
 
 
 
 
 
 
 
54
 
55
  with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
56
- # Clean banner with centered content
57
  gr.HTML("""
58
  <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
59
  <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐Ÿ‘๏ธ NuMarkdown-8B-Thinking</h1>
60
  <p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
61
- <div style="margin-top: 15px;">
62
- <a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ–ฅ๏ธ API / Platform</a>
63
- <span style="color: rgba(255,255,255,0.7);">|</span>
64
- <a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ—ฃ๏ธ Discord</a>
65
- <span style="color: rgba(255,255,255,0.7);">|</span>
66
- <a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ”— GitHub</a>
67
- <span style="color: rgba(255,255,255,0.7);">|</span>
68
- <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿค— Model</a>
69
- </div>
70
  </div>
71
-
72
- <p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
73
- <p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
74
  """)
75
 
76
  with gr.Row():
@@ -89,6 +99,13 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
89
  outputs=[thinking, raw_answer, output],
90
  )
91
 
 
 
92
  if __name__ == "__main__":
93
- print("Python script started...")
94
- demo.launch(share=True)
 
 
 
 
 
 
4
  from PIL import Image
5
  from io import BytesIO
6
 
7
+ print("=== DEBUG: Starting app.py ===")
8
+
9
  def encode_image_to_base64(image: Image.Image) -> str:
10
  buffered = BytesIO()
11
  image.save(buffered, format="JPEG")
 
13
  return f"data:image/jpeg;base64,{img_str}"
14
 
15
  def query_vllm_api(image, temperature, max_tokens=12_000):
16
+ print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")
17
+
18
+ if image is None:
19
+ return "No image provided", "No image provided", "Please upload an image first."
20
+
21
+ try:
22
+ messages = []
23
  # Optional: Resize image if needed (to avoid huge uploads)
24
  max_size = 1024
25
  if max(image.size) > max_size:
 
35
  ]
36
  })
37
 
38
+ payload = {
39
+ "model": "numind/NuMarkdown-8B-Thinking",
40
+ "messages": messages,
41
+ "max_tokens": max_tokens,
42
+ "temperature": temperature
43
+ }
44
 
45
+ print("=== DEBUG: About to make vLLM API request ===")
46
  response = requests.post(
47
  "http://localhost:8000/v1/chat/completions",
48
  json=payload,
 
52
  data = response.json()
53
 
54
  result = data["choices"][0]["message"]["content"]
55
+
56
+ # Handle the thinking/answer parsing
57
+ try:
58
+ reasoning = result.split("<think>")[1].split("</think>")[0]
59
+ answer = result.split("<answer>")[1].split("</answer>")[0]
60
+ except IndexError:
61
+ # If no thinking tags, return the full result
62
+ reasoning = "No thinking trace found"
63
+ answer = result
64
 
65
  return reasoning, answer, answer
66
+
67
  except requests.exceptions.RequestException as e:
68
+ error_msg = f"API request failed: {e}"
69
+ print(f"=== DEBUG: Request error: {error_msg} ===")
70
+ return error_msg, error_msg, error_msg
71
+ except Exception as e:
72
+ error_msg = f"Unexpected error: {e}"
73
+ print(f"=== DEBUG: Unexpected error: {error_msg} ===")
74
+ return error_msg, error_msg, error_msg
75
+
76
+ print("=== DEBUG: Creating Gradio interface ===")
77
 
78
  with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
 
79
  gr.HTML("""
80
  <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
81
  <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐Ÿ‘๏ธ NuMarkdown-8B-Thinking</h1>
82
  <p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
 
 
 
 
 
 
 
 
 
83
  </div>
 
 
 
84
  """)
85
 
86
  with gr.Row():
 
99
  outputs=[thinking, raw_answer, output],
100
  )
101
 
102
+ print("=== DEBUG: Gradio interface created ===")
103
+
104
  if __name__ == "__main__":
105
+ print("=== DEBUG: About to launch Gradio ===")
106
+ demo.launch(
107
+ server_name="0.0.0.0",
108
+ server_port=7860,
109
+ share=False
110
+ )
111
+ print("=== DEBUG: Gradio launched ===")