Spaces:

Vartex39
/

vizsum-pro

Sleeping

File size: 7,976 Bytes

import gradio as gr
import tempfile
import os
from ocr_engine import extract_text_from_image
from pdf_reader import extract_text_chunks_from_pdf
from summarizer import summarize_long_text, generate_answer
from utils import chunk_text_by_tokens
from core.rag.rag_engine import SimpleRAG
from docx import Document

chat_history = []
rag_engine = None

def load_uploaded_docs(files):
    global rag_engine
    if not files:
        return "Dosya yüklenmedi."
    rag_engine = SimpleRAG()
    docs = []

    for file in files:
        ext = os.path.splitext(file.name)[-1].lower()
        try:
            if ext == ".txt":
                with open(file.name, "r", encoding="utf-8") as f:
                    content = f.read()
            elif ext == ".pdf":
                chunks = extract_text_chunks_from_pdf(file.name)
                content = "\n".join(chunks)
            elif ext == ".docx":
                doc = Document(file.name)
                content = "\n".join([p.text for p in doc.paragraphs])
            elif ext in [".jpg", ".jpeg", ".png"]:
                content = extract_text_from_image(file.name)
            else:
                content = ""
            if content.strip():
                docs.append((os.path.basename(file.name), content))
        except Exception as e:
            print("Dosya okuma hatası:", e)

    if not docs:
        return "Dosyalar boş veya okunamadı."

    rag_engine.docs = docs
    rag_engine.build_index()
    return f"{len(docs)} dosya başarıyla yüklendi."

def extract_input_text(pdf, image, manual_text, start_page, end_page):
    if pdf:
        text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
        if any("[ERROR]" in chunk for chunk in text_chunks):
            return text_chunks[0], None, None
        all_text = "\n\n".join(text_chunks)
        chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1000))
        info_block = f"Sayfa Aralığı: {start_page}–{end_page}\nChunk Sayısı: {chunk_count}"
        return all_text, info_block, "pdf"
    elif image:
        text = extract_text_from_image(image)
        if "[ERROR]" in text:
            return text, None, None
        return text, None, "image"
    elif manual_text.strip():
        return manual_text, None, "manual"
    else:
        return "Lütfen bir giriş türü seçin.", None, None

def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
    if is_table and model_name != "anthropic/claude-3-haiku":
        return "Tablo içeriği için yalnızca Claude önerilir.", "", None

    all_text, info_block, input_type = extract_input_text(pdf, image, manual_text, start_page, end_page)
    if input_type is None:
        return all_text, "", None

    full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
    if info_block:
        full_summary = f"{info_block}\n\nModel: {model_name}\n\n{full_summary}"

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
    temp_file.write(full_summary)
    temp_file.close()
    return all_text, full_summary, temp_file.name

def format_chat_history():
    return "\n\n".join([f"Soru: {q}\nYanıt: {a}" for q, a in chat_history])

def toggle_history_display(show):
    return gr.update(visible=show, value=format_chat_history() if show else "")

def process_rag_query(query):
    if rag_engine is None:
        return "Önce metin dosyası yükleyin.", "", ""

    results = rag_engine.search(query, top_k=3)
    try:
        answer = generate_answer(query, results, chat_history=chat_history)
        chat_history.append((query, answer))
        sources = "\n\n---\n\n".join([f"Kaynak: {fname}" for fname, _ in results])
        history = format_chat_history()
    except Exception as e:
        answer = str(e)
        sources = "Hata oluştu."
        history = ""
    return answer, sources, history

def reset_history():
    global chat_history
    chat_history = []
    return "", "", ""

with gr.Blocks() as demo:
    gr.Markdown("## VizSum")

    with gr.Tabs():
        with gr.TabItem("Özetleme"):
            with gr.Row():
                pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"])
                image_input = gr.Image(type="filepath", label="Görsel Yükle")

            manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir")

            with gr.Row(visible=False) as page_controls:
                start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0)
                end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0)

            pdf_input.change(fn=lambda pdf: gr.update(visible=True), inputs=[pdf_input], outputs=[page_controls])
            pdf_input.change(fn=lambda pdf: (gr.update(interactive=True), gr.update(interactive=True)), inputs=[pdf_input], outputs=[start_page, end_page])

            mode_selector = gr.Dropdown(
                choices=["Teknik Özet", "Sade Anlatım", "Eleştir ve Değerlendir", "Başlık Çıkar", "Not Formatı", "Karma Özet", "Chat Özeti (Yazışma/Not)"],
                label="Özetleme Modu",
                value="Teknik Özet"
            )

            model_selector = gr.Dropdown(
                choices=["anthropic/claude-3-haiku", "openai/gpt-3.5-turbo", "mistralai/mistral-7b-instruct"],
                label="Dil Modeli",
                value="anthropic/claude-3-haiku"
            )

            lang_mode = gr.Radio(
                choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"],
                label="Dil Algılama / Çeviri Modu",
                value="Otomatik"
            )

            is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False)

            submit_btn = gr.Button("Özetle")

            text_output = gr.Textbox(label="Giriş Metni")
            summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True)
            summary_file = gr.File(label="Özeti İndir", interactive=False)

            submit_btn.click(
                fn=process_input,
                inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table],
                outputs=[text_output, summary_output, summary_file]
            )

        with gr.TabItem("Soru-Cevap (RAG)"):
            doc_upload = gr.File(label="Dosya Yükle (.txt, .pdf, .docx, .jpg, .png)", file_types=[".txt", ".pdf", ".docx", ".jpg", ".png"], file_count="multiple")
            upload_status = gr.Textbox(label="Yükleme Durumu")
            doc_upload.change(fn=load_uploaded_docs, inputs=[doc_upload], outputs=[upload_status])

            query_input = gr.Textbox(label="Soru", placeholder="Belgelerden bir şey sor...")
            answer_output = gr.Textbox(label="Claude Yanıtı", lines=10)
            source_output = gr.Textbox(label="Kaynaklar", lines=5, visible=False)

            history_toggle = gr.Checkbox(label="Geçmişi Göster", value=False)
            history_output = gr.Textbox(label="Soru-Cevap Geçmişi", lines=10, visible=False, interactive=False)

            show_sources = gr.Checkbox(label="Kaynakları Göster", value=False)
            rag_btn = gr.Button("Cevapla")
            reset_btn = gr.Button("Geçmişi Sıfırla")

            rag_btn.click(fn=process_rag_query, inputs=[query_input], outputs=[answer_output, source_output, history_output])
            history_toggle.change(fn=toggle_history_display, inputs=[history_toggle], outputs=[history_output])
            show_sources.change(fn=lambda visible: gr.update(visible=visible), inputs=[show_sources], outputs=[source_output])
            reset_btn.click(fn=reset_history, inputs=[], outputs=[answer_output, source_output, history_output])

if __name__ == "__main__":
    demo.launch(share=True)