omnivlm-dpo-demo

Running

App Files Files Community

omnivlm-dpo-demo / app.py

PerryCheng614

initial check-in gradio vlm UI

76578bc 12 months ago

raw

history blame

2.77 kB

	import gradio as gr
	import websockets
	import asyncio
	import json
	import base64
	from PIL import Image
	import io

	async def process_image_stream(image_path, prompt, max_tokens=512):
	"""
	Process image with streaming response via WebSocket
	"""
	if not image_path:
	yield "Please upload an image first."
	return

	try:
	# Read and convert image to base64
	with Image.open(image_path) as img:
	img = img.convert('RGB')
	buffer = io.BytesIO()
	img.save(buffer, format="JPEG")
	base64_image = base64.b64encode(buffer.getvalue()).decode('utf-8')

	# Connect to WebSocket
	async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-image/') as websocket:
	# Send image data and parameters as JSON
	await websocket.send(json.dumps({
	"image": f"data:image/jpeg;base64,{base64_image}",
	"prompt": prompt,
	"task": "instruct", # Fixed to instruct
	"max_tokens": max_tokens
	}))

	# Initialize response
	response = ""

	# Receive streaming response
	async for message in websocket:
	try:
	data = json.loads(message)
	if data["status"] == "generating":
	response += data["token"]
	yield response
	elif data["status"] == "complete":
	break
	elif data["status"] == "error":
	yield f"Error: {data['error']}"
	break
	except json.JSONDecodeError:
	continue

	except Exception as e:
	yield f"Error connecting to server: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_image_stream,
	inputs=[
	gr.Image(type="filepath", label="Upload Image"),
	gr.Textbox(
	label="Question",
	placeholder="Ask a question about the image...",
	value="Describe this image"
	),
	gr.Slider(
	minimum=50,
	maximum=200,
	value=200,
	step=1,
	label="Max Tokens"
	)
	],
	outputs=gr.Textbox(label="Response", interactive=False),
	title="Nexa Omni Vision",
	description="""
	Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
	""",
	examples=[
	["example_images/example_1.jpg", "Describe this image", 128],
	]
	)

	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860)