import gradio as gr import tempfile import os from ocr_engine import extract_text_from_image from pdf_reader import extract_text_chunks_from_pdf from summarizer import summarize_long_text, generate_answer from utils import chunk_text_by_tokens from core.rag.rag_engine import SimpleRAG from docx import Document chat_history = [] rag_engine = None def load_uploaded_docs(files): global rag_engine if not files: return "Dosya yüklenmedi." rag_engine = SimpleRAG() docs = [] for file in files: ext = os.path.splitext(file.name)[-1].lower() try: if ext == ".txt": with open(file.name, "r", encoding="utf-8") as f: content = f.read() elif ext == ".pdf": chunks = extract_text_chunks_from_pdf(file.name) content = "\n".join(chunks) elif ext == ".docx": doc = Document(file.name) content = "\n".join([p.text for p in doc.paragraphs]) elif ext in [".jpg", ".jpeg", ".png"]: content = extract_text_from_image(file.name) else: content = "" if content.strip(): docs.append((os.path.basename(file.name), content)) except Exception as e: print("Dosya okuma hatası:", e) if not docs: return "Dosyalar boş veya okunamadı." rag_engine.docs = docs rag_engine.build_index() return f"{len(docs)} dosya başarıyla yüklendi." def extract_input_text(pdf, image, manual_text, start_page, end_page): if pdf: text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page)) if any("[ERROR]" in chunk for chunk in text_chunks): return text_chunks[0], None, None all_text = "\n\n".join(text_chunks) chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1000)) info_block = f"Sayfa Aralığı: {start_page}–{end_page}\nChunk Sayısı: {chunk_count}" return all_text, info_block, "pdf" elif image: text = extract_text_from_image(image) if "[ERROR]" in text: return text, None, None return text, None, "image" elif manual_text.strip(): return manual_text, None, "manual" else: return "Lütfen bir giriş türü seçin.", None, None def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table): if is_table and model_name != "anthropic/claude-3-haiku": return "Tablo içeriği için yalnızca Claude önerilir.", "", None all_text, info_block, input_type = extract_input_text(pdf, image, manual_text, start_page, end_page) if input_type is None: return all_text, "", None full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table) if info_block: full_summary = f"{info_block}\n\nModel: {model_name}\n\n{full_summary}" temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') temp_file.write(full_summary) temp_file.close() return all_text, full_summary, temp_file.name def format_chat_history(): return "\n\n".join([f"Soru: {q}\nYanıt: {a}" for q, a in chat_history]) def toggle_history_display(show): return gr.update(visible=show, value=format_chat_history() if show else "") def process_rag_query(query): if rag_engine is None: return "Önce metin dosyası yükleyin.", "", "" results = rag_engine.search(query, top_k=3) try: answer = generate_answer(query, results, chat_history=chat_history) chat_history.append((query, answer)) sources = "\n\n---\n\n".join([f"Kaynak: {fname}" for fname, _ in results]) history = format_chat_history() except Exception as e: answer = str(e) sources = "Hata oluştu." history = "" return answer, sources, history def reset_history(): global chat_history chat_history = [] return "", "", "" with gr.Blocks() as demo: gr.Markdown("## VizSum") with gr.Tabs(): with gr.TabItem("Özetleme"): with gr.Row(): pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"]) image_input = gr.Image(type="filepath", label="Görsel Yükle") manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir") with gr.Row(visible=False) as page_controls: start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0) end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0) pdf_input.change(fn=lambda pdf: gr.update(visible=True), inputs=[pdf_input], outputs=[page_controls]) pdf_input.change(fn=lambda pdf: (gr.update(interactive=True), gr.update(interactive=True)), inputs=[pdf_input], outputs=[start_page, end_page]) mode_selector = gr.Dropdown( choices=["Teknik Özet", "Sade Anlatım", "Eleştir ve Değerlendir", "Başlık Çıkar", "Not Formatı", "Karma Özet", "Chat Özeti (Yazışma/Not)"], label="Özetleme Modu", value="Teknik Özet" ) model_selector = gr.Dropdown( choices=["anthropic/claude-3-haiku", "openai/gpt-3.5-turbo", "mistralai/mistral-7b-instruct"], label="Dil Modeli", value="anthropic/claude-3-haiku" ) lang_mode = gr.Radio( choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"], label="Dil Algılama / Çeviri Modu", value="Otomatik" ) is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False) submit_btn = gr.Button("Özetle") text_output = gr.Textbox(label="Giriş Metni") summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True) summary_file = gr.File(label="Özeti İndir", interactive=False) submit_btn.click( fn=process_input, inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table], outputs=[text_output, summary_output, summary_file] ) with gr.TabItem("Soru-Cevap (RAG)"): doc_upload = gr.File(label="Dosya Yükle (.txt, .pdf, .docx, .jpg, .png)", file_types=[".txt", ".pdf", ".docx", ".jpg", ".png"], file_count="multiple") upload_status = gr.Textbox(label="Yükleme Durumu") doc_upload.change(fn=load_uploaded_docs, inputs=[doc_upload], outputs=[upload_status]) query_input = gr.Textbox(label="Soru", placeholder="Belgelerden bir şey sor...") answer_output = gr.Textbox(label="Claude Yanıtı", lines=10) source_output = gr.Textbox(label="Kaynaklar", lines=5, visible=False) history_toggle = gr.Checkbox(label="Geçmişi Göster", value=False) history_output = gr.Textbox(label="Soru-Cevap Geçmişi", lines=10, visible=False, interactive=False) show_sources = gr.Checkbox(label="Kaynakları Göster", value=False) rag_btn = gr.Button("Cevapla") reset_btn = gr.Button("Geçmişi Sıfırla") rag_btn.click(fn=process_rag_query, inputs=[query_input], outputs=[answer_output, source_output, history_output]) history_toggle.change(fn=toggle_history_display, inputs=[history_toggle], outputs=[history_output]) show_sources.change(fn=lambda visible: gr.update(visible=visible), inputs=[show_sources], outputs=[source_output]) reset_btn.click(fn=reset_history, inputs=[], outputs=[answer_output, source_output, history_output]) if __name__ == "__main__": demo.launch(share=True)