Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Runtime error

App Files Files Community

Thai-HandWriting-to-Text / app.py

Aekanun

first

43d09a2 10 months ago

raw

history blame

3.67 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
	from PIL import Image

	# Global variables for model and processor
	model = None
	processor = None

	def load_model_and_processor():
	global model, processor

	model_path = "Aekanun/thai-handwriting-llm"
	base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"

	# BitsAndBytes config for 4-bit quantization
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	try:
	# Load processor from base model
	processor = AutoProcessor.from_pretrained(base_model_path)

	# Load fine-tuned model
	model = AutoModelForVision2Seq.from_pretrained(
	model_path,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config
	)
	return True
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	return False

	def process_handwriting(image):
	global model, processor

	if image is None:
	return "กรุณาอัพโหลดรูปภาพ"

	try:
	# Ensure image is in PIL format
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# Prepare prompt and messages
	prompt = """Transcribe the Thai handwritten text from the provided image.
	Only return the transcription in Thai language."""
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image", "image": image}
	],
	}
	]

	# Process input
	text = processor.apply_chat_template(messages, tokenize=False)
	inputs = processor(text=text, images=image, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generate output
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	pad_token_id=processor.tokenizer.pad_token_id
	)

	# Decode output
	transcription = processor.decode(outputs[0], skip_special_tokens=True)
	return transcription

	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}"

	# Load model when starting
	print("กำลังโหลดโมเดล...")
	model_loaded = load_model_and_processor()

	if model_loaded:
	# Create Gradio interface
	demo = gr.Interface(
	fn=process_handwriting,
	inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
	outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
	title="Thai Handwriting to Text ด้วย LLaMA Vision",
	description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ โดยใช้โมเดล LLaMA Vision ที่ fine-tune มาสำหรับภาษาไทย",
	examples=[["example1.jpg"], ["example2.jpg"]]
	)

	if __name__ == "__main__":
	demo.launch(share=True)
	else:
	print("ไม่สามารถโหลดโมเดลได้ กรุณาตรวจสอบ log")