Spaces:
Sleeping
Sleeping
File size: 7,976 Bytes
cc21f11 4c198b1 cc21f11 a8d7146 4c198b1 842626e a8d7146 4c198b1 e724b91 4c198b1 cc21f11 a8d7146 4c198b1 e724b91 4c198b1 e724b91 4c198b1 cc21f11 c4e9c8e e724b91 4c198b1 196ae1d a8d7146 196ae1d a8d7146 cc21f11 4c198b1 e724b91 cc21f11 795049f cc21f11 4c198b1 99e492b cc21f11 0303b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import gradio as gr
import tempfile
import os
from ocr_engine import extract_text_from_image
from pdf_reader import extract_text_chunks_from_pdf
from summarizer import summarize_long_text, generate_answer
from utils import chunk_text_by_tokens
from core.rag.rag_engine import SimpleRAG
from docx import Document
chat_history = []
rag_engine = None
def load_uploaded_docs(files):
global rag_engine
if not files:
return "Dosya yüklenmedi."
rag_engine = SimpleRAG()
docs = []
for file in files:
ext = os.path.splitext(file.name)[-1].lower()
try:
if ext == ".txt":
with open(file.name, "r", encoding="utf-8") as f:
content = f.read()
elif ext == ".pdf":
chunks = extract_text_chunks_from_pdf(file.name)
content = "\n".join(chunks)
elif ext == ".docx":
doc = Document(file.name)
content = "\n".join([p.text for p in doc.paragraphs])
elif ext in [".jpg", ".jpeg", ".png"]:
content = extract_text_from_image(file.name)
else:
content = ""
if content.strip():
docs.append((os.path.basename(file.name), content))
except Exception as e:
print("Dosya okuma hatası:", e)
if not docs:
return "Dosyalar boş veya okunamadı."
rag_engine.docs = docs
rag_engine.build_index()
return f"{len(docs)} dosya başarıyla yüklendi."
def extract_input_text(pdf, image, manual_text, start_page, end_page):
if pdf:
text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
if any("[ERROR]" in chunk for chunk in text_chunks):
return text_chunks[0], None, None
all_text = "\n\n".join(text_chunks)
chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1000))
info_block = f"Sayfa Aralığı: {start_page}–{end_page}\nChunk Sayısı: {chunk_count}"
return all_text, info_block, "pdf"
elif image:
text = extract_text_from_image(image)
if "[ERROR]" in text:
return text, None, None
return text, None, "image"
elif manual_text.strip():
return manual_text, None, "manual"
else:
return "Lütfen bir giriş türü seçin.", None, None
def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
if is_table and model_name != "anthropic/claude-3-haiku":
return "Tablo içeriği için yalnızca Claude önerilir.", "", None
all_text, info_block, input_type = extract_input_text(pdf, image, manual_text, start_page, end_page)
if input_type is None:
return all_text, "", None
full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
if info_block:
full_summary = f"{info_block}\n\nModel: {model_name}\n\n{full_summary}"
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
temp_file.write(full_summary)
temp_file.close()
return all_text, full_summary, temp_file.name
def format_chat_history():
return "\n\n".join([f"Soru: {q}\nYanıt: {a}" for q, a in chat_history])
def toggle_history_display(show):
return gr.update(visible=show, value=format_chat_history() if show else "")
def process_rag_query(query):
if rag_engine is None:
return "Önce metin dosyası yükleyin.", "", ""
results = rag_engine.search(query, top_k=3)
try:
answer = generate_answer(query, results, chat_history=chat_history)
chat_history.append((query, answer))
sources = "\n\n---\n\n".join([f"Kaynak: {fname}" for fname, _ in results])
history = format_chat_history()
except Exception as e:
answer = str(e)
sources = "Hata oluştu."
history = ""
return answer, sources, history
def reset_history():
global chat_history
chat_history = []
return "", "", ""
with gr.Blocks() as demo:
gr.Markdown("## VizSum")
with gr.Tabs():
with gr.TabItem("Özetleme"):
with gr.Row():
pdf_input = gr.File(label="PDF Yükle", file_types=[".pdf"])
image_input = gr.Image(type="filepath", label="Görsel Yükle")
manual_input = gr.Textbox(lines=5, label="Metni Manuel Gir")
with gr.Row(visible=False) as page_controls:
start_page = gr.Number(label="Başlangıç Sayfası", value=1, minimum=1, precision=0)
end_page = gr.Number(label="Bitiş Sayfası", value=5, minimum=1, precision=0)
pdf_input.change(fn=lambda pdf: gr.update(visible=True), inputs=[pdf_input], outputs=[page_controls])
pdf_input.change(fn=lambda pdf: (gr.update(interactive=True), gr.update(interactive=True)), inputs=[pdf_input], outputs=[start_page, end_page])
mode_selector = gr.Dropdown(
choices=["Teknik Özet", "Sade Anlatım", "Eleştir ve Değerlendir", "Başlık Çıkar", "Not Formatı", "Karma Özet", "Chat Özeti (Yazışma/Not)"],
label="Özetleme Modu",
value="Teknik Özet"
)
model_selector = gr.Dropdown(
choices=["anthropic/claude-3-haiku", "openai/gpt-3.5-turbo", "mistralai/mistral-7b-instruct"],
label="Dil Modeli",
value="anthropic/claude-3-haiku"
)
lang_mode = gr.Radio(
choices=["Otomatik", "Sadece Türkçe", "Sadece İngilizce", "Türkçeye Çevir", "İngilizceye Çevir"],
label="Dil Algılama / Çeviri Modu",
value="Otomatik"
)
is_table = gr.Checkbox(label="Tablo içeriyor (Claude tablo gibi özetlesin)", value=False)
submit_btn = gr.Button("Özetle")
text_output = gr.Textbox(label="Giriş Metni")
summary_output = gr.Textbox(label="AI Özeti", lines=10, show_copy_button=True)
summary_file = gr.File(label="Özeti İndir", interactive=False)
submit_btn.click(
fn=process_input,
inputs=[pdf_input, image_input, manual_input, mode_selector, model_selector, start_page, end_page, lang_mode, is_table],
outputs=[text_output, summary_output, summary_file]
)
with gr.TabItem("Soru-Cevap (RAG)"):
doc_upload = gr.File(label="Dosya Yükle (.txt, .pdf, .docx, .jpg, .png)", file_types=[".txt", ".pdf", ".docx", ".jpg", ".png"], file_count="multiple")
upload_status = gr.Textbox(label="Yükleme Durumu")
doc_upload.change(fn=load_uploaded_docs, inputs=[doc_upload], outputs=[upload_status])
query_input = gr.Textbox(label="Soru", placeholder="Belgelerden bir şey sor...")
answer_output = gr.Textbox(label="Claude Yanıtı", lines=10)
source_output = gr.Textbox(label="Kaynaklar", lines=5, visible=False)
history_toggle = gr.Checkbox(label="Geçmişi Göster", value=False)
history_output = gr.Textbox(label="Soru-Cevap Geçmişi", lines=10, visible=False, interactive=False)
show_sources = gr.Checkbox(label="Kaynakları Göster", value=False)
rag_btn = gr.Button("Cevapla")
reset_btn = gr.Button("Geçmişi Sıfırla")
rag_btn.click(fn=process_rag_query, inputs=[query_input], outputs=[answer_output, source_output, history_output])
history_toggle.change(fn=toggle_history_display, inputs=[history_toggle], outputs=[history_output])
show_sources.change(fn=lambda visible: gr.update(visible=visible), inputs=[show_sources], outputs=[source_output])
reset_btn.click(fn=reset_history, inputs=[], outputs=[answer_output, source_output, history_output])
if __name__ == "__main__":
demo.launch(share=True)
|