Spaces:

Vartex39
/

vizsum-pro

Running

File size: 4,097 Bytes

cc21f11
a8d7146
cc21f11
a8d7146
c4e9c8e
 
cc21f11
3c1d21c
 
 
c4e9c8e
cc21f11
842626e
a8d7146
 
cc21f11
 
a8d7146
 
 
cc21f11
a8d7146
cc21f11
196ae1d
cc21f11
a8d7146
c4e9c8e
cc21f11
c4e9c8e
 
 
 
 
a8d7146
c4e9c8e
 
196ae1d
 
a8d7146
196ae1d
 
a8d7146
cc21f11
c4e9c8e
cc21f11
795049f
cc21f11
 
0303b9b
 
cc21f11
0303b9b
cc21f11
10e3f9b
2ca41ce
 
10e3f9b
 
 
 
 
 
 
 
 
 
85327a5
 
 
 
 
 
 
 
 
 
96bc60c
842626e
 
 
 
 
 
 
189b51b
 
842626e
 
 
 
cc21f11
940be83
 
 
 
 
 
 
 
 
 
cc21f11
 
 
 
0303b9b
31ecf39
196ae1d
99e492b
 
 
 
 
 
3c1d21c
 
cc21f11
 
 
3c1d21c
196ae1d
cc21f11
245e92a
99e492b
 
cc21f11
0303b9b

import gradio as gr
import tempfile  
from ocr_engine import extract_text_from_image
from pdf_reader import extract_text_chunks_from_pdf
from summarizer import summarize_long_text
from utils import chunk_text_by_tokens 

def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
    if is_table and model_name != "anthropic/claude-3-haiku":
        return "Tablo içeriği için yalnızca Claude önerilir.","",None

    if pdf is not None:
        text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
        if any("[ERROR]" in chunk for chunk in text_chunks):
            return text_chunks[0], "", None
    elif image is not None:
        text = extract_text_from_image(image)
        if "[ERROR]" in text:
            return text, "", None
        text_chunks = [text]
    elif manual_text.strip() != "":
        text_chunks = [manual_text]
    else:
        return "Lütfen bir giriş türü seçin.", "", None

    all_text = "\n\n".join(text_chunks)
    chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1300))

    info_block = f"""
     Sayfa Aralığı: {start_page}–{end_page}
     Model: {model_name}
     Chunk Sayısı: {chunk_count}
""".strip()

    full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
    full_summary = f"{info_block}\n\n{full_summary}"

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
    temp_file.write(full_summary)
    temp_file.close()

    return all_text, full_summary, temp_file.name


with gr.Blocks() as demo:
    gr.Markdown("## VizSum")

    with gr.Row():
        pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"])
        image_input = gr.Image(type="filepath", label="Görsel Yükle")

    manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir")

    with gr.Row(visible=False) as page_controls:
        start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0)
        end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0)

    def show_page_controls(pdf):
        return gr.update(visible=True)

    pdf_input.change(
        fn=show_page_controls,
        inputs=[pdf_input],
        outputs=[page_controls]
    )


        
    def enable_page_inputs(pdf):
        return gr.update(interactive=True), gr.update(interactive=True)

    pdf_input.change(
        fn=enable_page_inputs,
        inputs=[pdf_input],
        outputs=[start_page, end_page]
    )

    mode_selector = gr.Dropdown(
        choices=[
            "Teknik Özet",
            "Sade Anlatım",
            "Eleştir ve Değerlendir",
            "Başlık Çıkar",
            "Not Formatı",
            "Karma Özet",
            "Chat Özeti (Yazışma/Not)"
        ],
        label="Özetleme Modu",
        value="Teknik Özet"
    )

    model_selector = gr.Dropdown(
        choices=[
            "anthropic/claude-3-haiku",
            "openai/gpt-3.5-turbo",
            "mistralai/mistral-7b-instruct"
        ],
        label="Dil Modeli",
        value="anthropic/claude-3-haiku"
    )

    with gr.Row():
        submit_btn = gr.Button("Özetle")

    with gr.Row():
        text_output = gr.Textbox(label="Giriş Metni")
        summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True)
        summary_file = gr.File(label="Özeti İndir", interactive=True)
    
    lang_mode = gr.Radio(
        choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"],
        label="Dil Algılama / Çeviri Modu",
        value="Otomatik"
    )
    
    is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False)

    submit_btn.click(
        fn=process_input,
        inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table],
        outputs=[text_output, summary_output, summary_file]
    )



if __name__ == "__main__":
    demo.launch(share=True)