Spaces:

numind
/

NuMarkdown-8B-Thinking

Runtime error

App Files Files Community

NuMarkdown-8B-Thinking / app.py

liamcripwell

Update app.py

b5a17d7 verified about 1 month ago

raw

history blame

4.36 kB

	import gradio as gr
	import requests
	import base64
	from PIL import Image
	from io import BytesIO

	def encode_image_to_base64(image: Image.Image) -> str:
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/jpeg;base64,{img_str}"

	def query_vllm_api(image, temperature, max_tokens=12_000):
	messages = []
	if image is not None:
	# Optional: Resize image if needed (to avoid huge uploads)
	max_size = 1024
	if max(image.size) > max_size:
	ratio = max_size / max(image.size)
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.Resampling.LANCZOS)

	image_b64 = encode_image_to_base64(image)
	messages.append({
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": image_b64}}
	]
	})

	payload = {
	"model": "numind/NuMarkdown-8B-Thinking",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}

	try:
	response = requests.post(
	"http://localhost:8000/v1/chat/completions",
	json=payload,
	timeout=60
	)
	response.raise_for_status()
	data = response.json()

	result = data["choices"][0]["message"]["content"]
	reasoning = result.split("<think>")[1].split("</think>")[0]
	answer = result.split("<answer>")[1].split("</answer>")[0]

	return reasoning, answer, answer
	except requests.exceptions.RequestException as e:
	return f"API request failed: {e}"

	with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
	# Clean banner with centered content
	gr.HTML("""
	<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">👁️ NuMarkdown-8B-Thinking</h1>
	<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
	<div style="margin-top: 15px;">
	<a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🖥️ API / Platform</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🗣️ Discord</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🔗 GitHub</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
	</div>
	</div>

	<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
	<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
	""")

	with gr.Row():
	with gr.Column():
	temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
	img_in = gr.Image(type="pil", label="Upload Image")
	btn = gr.Button("Generate Response")
	with gr.Column():
	thinking = gr.Textbox(label="Thinking Trace", lines=10)
	raw_answer = gr.Textbox(label="Raw Output", lines=5)
	output = gr.Markdown(label="Response")

	btn.click(
	query_vllm_api,
	inputs=[img_in, temperature],
	outputs=[thinking, raw_answer, output],
	)

	if __name__ == "__main__":
	print("Python script started...")
	demo.launch(share=True)