Spaces:

xzerus
/

rag-trial

Runtime error

rag-trial / app.py

Update app.py

08d3750 verified 9 months ago

1.33 kB

	import os
	from transformers import AutoProcessor, AutoModelForCausalLM
	import gradio as gr
	import torch
	from PIL import Image

	# Load the Hugging Face token from environment variables
	hf_token = os.getenv("HF_AUTH_TOKEN")
	model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load the model and processor with authentication
	processor = AutoProcessor.from_pretrained(model_name, use_auth_token=hf_token, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token, torch_dtype=torch.float16, trust_remote_code=True).to(device)

	# Function to process image and text prompt
	def process_image(image, prompt="<ocr>"):
	inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
	outputs = model.generate(**inputs, max_new_tokens=1024)
	generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
	return generated_text

	# Gradio Interface
	iface = gr.Interface(
	fn=process_image,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(value="<ocr>", label="Prompt"),
	],
	outputs="text",
	title="OCR with Llama-3.2-11B-Vision-Instruct",
	description="Upload an image and input a prompt (e.g., '<ocr>') to extract text.",
	)

	iface.launch()