Spaces:
Running
Running
import gradio as gr | |
import tempfile | |
from ocr_engine import extract_text_from_image | |
from pdf_reader import extract_text_chunks_from_pdf | |
from summarizer import summarize_text | |
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table): | |
if is_table and model_name != "anthropic/claude-3-haiku": | |
return "Tablo içeriği için yalnızca Claude önerilir.","",None | |
if pdf is not None: | |
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page)) | |
if any("[ERROR]" in chunk for chunk in text_chunks): | |
return text_chunks[0], "", None | |
elif image is not None: | |
text = extract_text_from_image(image) | |
if "[ERROR]" in text: | |
return text, "", None | |
text_chunks = [text] | |
elif manual_text.strip() != "": | |
text_chunks = [manual_text] | |
else: | |
return "Lütfen bir giriş türü seçin.", "", None | |
all_text = "\n\n".join(text_chunks) | |
summaries = [] | |
for chunk in text_chunks: | |
summary = summarize_text(chunk, mode, model_name, lang_mode, is_table) | |
summaries.append(summary) | |
full_summary = "\n\n".join(summaries) | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') | |
temp_file.write(full_summary) | |
temp_file.close() | |
return all_text, full_summary, temp_file.name | |
with gr.Blocks() as demo: | |
gr.Markdown("## VizSum") | |
with gr.Row(): | |
pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"]) | |
image_input = gr.Image(type="filepath", label="Görsel Yükle") | |
manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir") | |
with gr.Row(visible=False) as page_controls: | |
start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0) | |
end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0) | |
def show_page_controls(pdf): | |
return gr.update(visible=True) | |
pdf_input.change( | |
fn=show_page_controls, | |
inputs=[pdf_input], | |
outputs=[page_controls] | |
) | |
def enable_page_inputs(pdf): | |
return gr.update(interactive=True), gr.update(interactive=True) | |
pdf_input.change( | |
fn=enable_page_inputs, | |
inputs=[pdf_input], | |
outputs=[start_page, end_page] | |
) | |
mode_selector = gr.Dropdown( | |
choices=[ | |
"Teknik Özet", | |
"Sade Anlatım", | |
"Eleştir ve Değerlendir", | |
"Başlık Çıkar", | |
"Not Formatı", | |
"Karma Özet", | |
"Chat Özeti (Yazışma/Not)" | |
], | |
label="Özetleme Modu", | |
value="Teknik Özet" | |
) | |
model_selector = gr.Dropdown( | |
choices=[ | |
"anthropic/claude-3-haiku", | |
"openai/gpt-3.5-turbo", | |
"mistralai/mistral-7b-instruct" | |
], | |
label="Dil Modeli", | |
value="anthropic/claude-3-haiku" | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Özetle") | |
with gr.Row(): | |
text_output = gr.Textbox(label="Giriş Metni") | |
summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True) | |
summary_file = gr.File(label="Özeti İndir", interactive=True) | |
lang_mode = gr.Radio( | |
choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"], | |
label="Dil Algılama / Çeviri Modu", | |
value="Otomatik" | |
) | |
is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False) | |
submit_btn.click( | |
fn=process_input, | |
inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table], | |
outputs=[text_output, summary_output, summary_file] | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |