Spaces:

Zatimm
/

epub1

Sleeping

App Files Files Community

epub1 / app.py

Zatimm

Update app.py

e0fc2cc verified about 2 months ago

raw

history blame contribute delete

6.37 kB

	# app.py

	import gradio as gr
	import torch
	from transformers import NougatProcessor, VisionEncoderDecoderModel
	from PIL import Image
	import fitz # PyMuPDF
	from typing import List, Iterator, Tuple
	import os
	import requests
	from io import BytesIO

	# --- Model ve İşlemci Yükleme ---
	# Modelin yalnızca bir kez yüklenmesini sağlamak için global olarak tanımlıyoruz.
	MODEL_ID = "facebook/nougat-base"

	try:
	processor = NougatProcessor.from_pretrained(MODEL_ID)
	model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID)

	# Modeli uygun cihaza taşıma (GPU varsa GPU, yoksa CPU)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	print(f"Model '{MODEL_ID}' başarıyla yüklendi ve '{device.upper()}' cihazına taşındı.")
	MODEL_LOADED = True
	except Exception as e:
	print(f"Model yüklenirken bir hata oluştu: {e}")
	MODEL_LOADED = False
	model = None
	processor = None

	# --- Çekirdek İşleme Fonksiyonları ---

	def process_single_image(image: Image.Image) -> str:
	"""Tek bir PIL görüntüsünü işler ve Markdown metnini döndürür."""
	if not MODEL_LOADED or image is None:
	return "Model yüklenemedi veya geçersiz görüntü."

	try:
	pixel_values = processor(images=image, return_tensors="pt").pixel_values

	outputs = model.generate(
	pixel_values.to(device),
	min_length=1,
	max_new_tokens=4096,
	bad_words_ids=[[processor.tokenizer.unk_token_id]],
	)

	sequence = processor.batch_decode(outputs, skip_special_tokens=True)
	sequence = processor.post_process_generation(sequence[0], fix_markdown=False)

	return sequence
	except Exception as e:
	# Hatanın konsola yazdırılması
	print(f"Görüntü işleme hatası: {e}")
	return f"Görüntü işlenirken bir hata oluştu: {e}"

	def process_pdf_file(pdf_file) -> Iterator[Tuple[str, str]]:
	"""Yüklenen bir PDF dosyasını işler, her sayfa için durum güncellemesi yapar."""
	if not MODEL_LOADED:
	yield "Hata: Model yüklenemedi.", ""
	return
	if pdf_file is None:
	yield "Hata: PDF dosyası yüklenmedi.", ""
	return

	doc = None
	try:
	yield "PDF dosyası açılıyor...", ""
	doc = fitz.open(pdf_file.name)
	total_pages = len(doc)

	if total_pages == 0:
	yield "Hata: PDF dosyasında işlenecek sayfa bulunamadı.", ""
	return

	full_markdown_content = []
	for page_num in range(total_pages):
	status_message = f"Sayfa {page_num + 1} / {total_pages} işleniyor..."
	# Her sayfadan önce durum güncellemesi ve o ana kadarki içeriği gönder
	yield status_message, "\n\n---\n\n".join(full_markdown_content)

	page = doc.load_page(page_num)
	pix = page.get_pixmap(dpi=150)
	image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples).convert("RGB")

	page_markdown = process_single_image(image)
	full_markdown_content.append(f"## Sayfa {page_num + 1}\n\n{page_markdown}")

	# Sonuç
	final_output = "\n\n---\n\n".join(full_markdown_content)
	yield "İşlem tamamlandı!", final_output

	except Exception as e:
	error_msg = f"PDF işlenirken bir hata oluştu: {e}"
	print(error_msg) # Hatanın sunucu loglarına yazdırılması
	yield error_msg, "" # Hatanın arayüzde gösterilmesi
	finally:
	if doc:
	doc.close()

	# --- Gradio Arayüzü ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 📄 Facebook Nougat Belge Dönüştürücü
	Bu arayüz, Meta AI tarafından geliştirilen facebook/nougat-base modelini kullanarak belgelerinizi (PDF veya resim) yapılandırılmış Markdown metnine dönüştürmenizi sağlar.
	Lütfen bir PDF dosyası veya bir belge sayfası görüntüsü yükleyin.
	"""
	)

	with gr.Tabs():
	# PDF İşleme Sekmesi
	with gr.TabItem("PDF Dosyasını İşle"):
	pdf_input = gr.File(label="PDF Dosyası Yükle", file_types=[".pdf"])
	pdf_process_button = gr.Button("PDF'i Dönüştür", variant="primary")
	# YENİ: Durum mesajları için bir metin kutusu eklendi
	pdf_status = gr.Textbox(label="İşlem Durumu", value="İşlem bekleniyor...", interactive=False)
	pdf_output = gr.Markdown(label="Dönüştürülen Metin (Markdown)")

	# Tek Görüntü İşleme Sekmesi
	with gr.TabItem("Tek Görüntü İşle"):
	image_input = gr.Image(label="Belge Sayfası Görüntüsü Yükle", type="pil")
	image_process_button = gr.Button("Görüntüyü Dönüştür", variant="primary")
	image_output = gr.Markdown(label="Dönüştürülen Metin (Markdown)")

	# Buton tıklama olayı güncellendi
	pdf_process_button.click(
	fn=process_pdf_file,
	inputs=[pdf_input],
	# YENİ: Çıktılar hem durum kutusunu hem de sonuç kutusunu güncelliyor
	outputs=[pdf_status, pdf_output],
	api_name="process_pdf"
	)

	image_process_button.click(
	fn=process_single_image,
	inputs=[image_input],
	outputs=[image_output],
	api_name="process_image"
	)

	gr.Markdown("### Örnek Kullanım")
	example_image_path = "nougat_paper_example.png"
	if os.path.exists(example_image_path):
	gr.Examples(
	examples=[example_image_path],
	inputs=image_input,
	outputs=image_output,
	fn=process_single_image,
	cache_examples=True,
	label="Örnek Görüntü"
	)

	if __name__ == "__main__":
	if not os.path.exists("nougat_paper_example.png"):
	try:
	url = "https://huggingface.co/datasets/hf-internal-testing/fixtures_docvqa/resolve/main/nougat_paper.png"
	response = requests.get(url)
	img = Image.open(BytesIO(response.content))
	img.save("nougat_paper_example.png")
	print("Örnek resim 'nougat_paper_example.png' indirildi.")
	except Exception as e:
	print(f"Örnek resim indirilemedi: {e}")

	demo.launch(debug=True)