Spaces:
Running
Running
import gradio as gr | |
import tempfile | |
import os | |
from ocr_engine import extract_text_from_image | |
from pdf_reader import extract_text_chunks_from_pdf | |
from summarizer import summarize_long_text, generate_answer | |
from utils import chunk_text_by_tokens | |
from core.rag.rag_engine import SimpleRAG | |
from docx import Document | |
chat_history = [] | |
rag_engine = None | |
def load_uploaded_docs(files): | |
global rag_engine | |
if not files: | |
return "Dosya yüklenmedi." | |
rag_engine = SimpleRAG() | |
docs = [] | |
for file in files: | |
ext = os.path.splitext(file.name)[-1].lower() | |
try: | |
if ext == ".txt": | |
with open(file.name, "r", encoding="utf-8") as f: | |
content = f.read() | |
elif ext == ".pdf": | |
chunks = extract_text_chunks_from_pdf(file.name) | |
content = "\n".join(chunks) | |
elif ext == ".docx": | |
doc = Document(file.name) | |
content = "\n".join([p.text for p in doc.paragraphs]) | |
elif ext in [".jpg", ".jpeg", ".png"]: | |
content = extract_text_from_image(file.name) | |
else: | |
content = "" | |
if content.strip(): | |
docs.append((os.path.basename(file.name), content)) | |
except Exception as e: | |
print("Dosya okuma hatası:", e) | |
if not docs: | |
return "Dosyalar boş veya okunamadı." | |
rag_engine.docs = docs | |
rag_engine.build_index() | |
return f"{len(docs)} dosya başarıyla yüklendi." | |
def extract_input_text(pdf, image, manual_text, start_page, end_page): | |
if pdf: | |
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page)) | |
if any("[ERROR]" in chunk for chunk in text_chunks): | |
return text_chunks[0], None, None | |
all_text = "\n\n".join(text_chunks) | |
chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1000)) | |
info_block = f"Sayfa Aralığı: {start_page}–{end_page}\nChunk Sayısı: {chunk_count}" | |
return all_text, info_block, "pdf" | |
elif image: | |
text = extract_text_from_image(image) | |
if "[ERROR]" in text: | |
return text, None, None | |
return text, None, "image" | |
elif manual_text.strip(): | |
return manual_text, None, "manual" | |
else: | |
return "Lütfen bir giriş türü seçin.", None, None | |
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table): | |
if is_table and model_name != "anthropic/claude-3-haiku": | |
return "Tablo içeriği için yalnızca Claude önerilir.", "", None | |
all_text, info_block, input_type = extract_input_text(pdf, image, manual_text, start_page, end_page) | |
if input_type is None: | |
return all_text, "", None | |
full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table) | |
if info_block: | |
full_summary = f"{info_block}\n\nModel: {model_name}\n\n{full_summary}" | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') | |
temp_file.write(full_summary) | |
temp_file.close() | |
return all_text, full_summary, temp_file.name | |
def format_chat_history(): | |
return "\n\n".join([f"Soru: {q}\nYanıt: {a}" for q, a in chat_history]) | |
def toggle_history_display(show): | |
return gr.update(visible=show, value=format_chat_history() if show else "") | |
def process_rag_query(query): | |
if rag_engine is None: | |
return "Önce metin dosyası yükleyin.", "", "" | |
results = rag_engine.search(query, top_k=3) | |
try: | |
answer = generate_answer(query, results, chat_history=chat_history) | |
chat_history.append((query, answer)) | |
sources = "\n\n---\n\n".join([f"Kaynak: {fname}" for fname, _ in results]) | |
history = format_chat_history() | |
except Exception as e: | |
answer = str(e) | |
sources = "Hata oluştu." | |
history = "" | |
return answer, sources, history | |
def reset_history(): | |
global chat_history | |
chat_history = [] | |
return "", "", "" | |
with gr.Blocks() as demo: | |
gr.Markdown("## VizSum") | |
with gr.Tabs(): | |
with gr.TabItem("Özetleme"): | |
with gr.Row(): | |
pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"]) | |
image_input = gr.Image(type="filepath", label="Görsel Yükle") | |
manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir") | |
with gr.Row(visible=False) as page_controls: | |
start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0) | |
end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0) | |
pdf_input.change(fn=lambda pdf: gr.update(visible=True), inputs=[pdf_input], outputs=[page_controls]) | |
pdf_input.change(fn=lambda pdf: (gr.update(interactive=True), gr.update(interactive=True)), inputs=[pdf_input], outputs=[start_page, end_page]) | |
mode_selector = gr.Dropdown( | |
choices=["Teknik Özet", "Sade Anlatım", "Eleştir ve Değerlendir", "Başlık Çıkar", "Not Formatı", "Karma Özet", "Chat Özeti (Yazışma/Not)"], | |
label="Özetleme Modu", | |
value="Teknik Özet" | |
) | |
model_selector = gr.Dropdown( | |
choices=["anthropic/claude-3-haiku", "openai/gpt-3.5-turbo", "mistralai/mistral-7b-instruct"], | |
label="Dil Modeli", | |
value="anthropic/claude-3-haiku" | |
) | |
lang_mode = gr.Radio( | |
choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"], | |
label="Dil Algılama / Çeviri Modu", | |
value="Otomatik" | |
) | |
is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False) | |
submit_btn = gr.Button("Özetle") | |
text_output = gr.Textbox(label="Giriş Metni") | |
summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True) | |
summary_file = gr.File(label="Özeti İndir", interactive=False) | |
submit_btn.click( | |
fn=process_input, | |
inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table], | |
outputs=[text_output, summary_output, summary_file] | |
) | |
with gr.TabItem("Soru-Cevap (RAG)"): | |
doc_upload = gr.File(label="Dosya Yükle (.txt, .pdf, .docx, .jpg, .png)", file_types=[".txt", ".pdf", ".docx", ".jpg", ".png"], file_count="multiple") | |
upload_status = gr.Textbox(label="Yükleme Durumu") | |
doc_upload.change(fn=load_uploaded_docs, inputs=[doc_upload], outputs=[upload_status]) | |
query_input = gr.Textbox(label="Soru", placeholder="Belgelerden bir şey sor...") | |
answer_output = gr.Textbox(label="Claude Yanıtı", lines=10) | |
source_output = gr.Textbox(label="Kaynaklar", lines=5, visible=False) | |
history_toggle = gr.Checkbox(label="Geçmişi Göster", value=False) | |
history_output = gr.Textbox(label="Soru-Cevap Geçmişi", lines=10, visible=False, interactive=False) | |
show_sources = gr.Checkbox(label="Kaynakları Göster", value=False) | |
rag_btn = gr.Button("Cevapla") | |
reset_btn = gr.Button("Geçmişi Sıfırla") | |
rag_btn.click(fn=process_rag_query, inputs=[query_input], outputs=[answer_output, source_output, history_output]) | |
history_toggle.change(fn=toggle_history_display, inputs=[history_toggle], outputs=[history_output]) | |
show_sources.change(fn=lambda visible: gr.update(visible=visible), inputs=[show_sources], outputs=[source_output]) | |
reset_btn.click(fn=reset_history, inputs=[], outputs=[answer_output, source_output, history_output]) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |