Spaces:
Running
Running
File size: 4,097 Bytes
cc21f11 a8d7146 cc21f11 a8d7146 c4e9c8e cc21f11 3c1d21c c4e9c8e cc21f11 842626e a8d7146 cc21f11 a8d7146 cc21f11 a8d7146 cc21f11 196ae1d cc21f11 a8d7146 c4e9c8e cc21f11 c4e9c8e a8d7146 c4e9c8e 196ae1d a8d7146 196ae1d a8d7146 cc21f11 c4e9c8e cc21f11 795049f cc21f11 0303b9b cc21f11 0303b9b cc21f11 10e3f9b 2ca41ce 10e3f9b 85327a5 96bc60c 842626e 189b51b 842626e cc21f11 940be83 cc21f11 0303b9b 31ecf39 196ae1d 99e492b 3c1d21c cc21f11 3c1d21c 196ae1d cc21f11 245e92a 99e492b cc21f11 0303b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import tempfile
from ocr_engine import extract_text_from_image
from pdf_reader import extract_text_chunks_from_pdf
from summarizer import summarize_long_text
from utils import chunk_text_by_tokens
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
if is_table and model_name != "anthropic/claude-3-haiku":
return "Tablo içeriği için yalnızca Claude önerilir.","",None
if pdf is not None:
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
if any("[ERROR]" in chunk for chunk in text_chunks):
return text_chunks[0], "", None
elif image is not None:
text = extract_text_from_image(image)
if "[ERROR]" in text:
return text, "", None
text_chunks = [text]
elif manual_text.strip() != "":
text_chunks = [manual_text]
else:
return "Lütfen bir giriş türü seçin.", "", None
all_text = "\n\n".join(text_chunks)
chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1300))
info_block = f"""
Sayfa Aralığı: {start_page}–{end_page}
Model: {model_name}
Chunk Sayısı: {chunk_count}
""".strip()
full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
full_summary = f"{info_block}\n\n{full_summary}"
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
temp_file.write(full_summary)
temp_file.close()
return all_text, full_summary, temp_file.name
with gr.Blocks() as demo:
gr.Markdown("## VizSum")
with gr.Row():
pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"])
image_input = gr.Image(type="filepath", label="Görsel Yükle")
manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir")
with gr.Row(visible=False) as page_controls:
start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0)
end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0)
def show_page_controls(pdf):
return gr.update(visible=True)
pdf_input.change(
fn=show_page_controls,
inputs=[pdf_input],
outputs=[page_controls]
)
def enable_page_inputs(pdf):
return gr.update(interactive=True), gr.update(interactive=True)
pdf_input.change(
fn=enable_page_inputs,
inputs=[pdf_input],
outputs=[start_page, end_page]
)
mode_selector = gr.Dropdown(
choices=[
"Teknik Özet",
"Sade Anlatım",
"Eleştir ve Değerlendir",
"Başlık Çıkar",
"Not Formatı",
"Karma Özet",
"Chat Özeti (Yazışma/Not)"
],
label="Özetleme Modu",
value="Teknik Özet"
)
model_selector = gr.Dropdown(
choices=[
"anthropic/claude-3-haiku",
"openai/gpt-3.5-turbo",
"mistralai/mistral-7b-instruct"
],
label="Dil Modeli",
value="anthropic/claude-3-haiku"
)
with gr.Row():
submit_btn = gr.Button("Özetle")
with gr.Row():
text_output = gr.Textbox(label="Giriş Metni")
summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True)
summary_file = gr.File(label="Özeti İndir", interactive=True)
lang_mode = gr.Radio(
choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"],
label="Dil Algılama / Çeviri Modu",
value="Otomatik"
)
is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False)
submit_btn.click(
fn=process_input,
inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table],
outputs=[text_output, summary_output, summary_file]
)
if __name__ == "__main__":
demo.launch(share=True)
|