Spaces:

Vartex39
/

vizsum-pro

Sleeping

Vartex39 commited on May 27

Commit

c4e9c8e

1 Parent(s): 189b51b

Update: Chunklama destekli özetleme eklendi

Files changed (5) hide show

.env.example CHANGED Viewed

	@@ -1 +1 @@
1	- OPENROUTER_API_KEY=sk-or-v1-~~c32246d1f40a88f852ef31e6c9ed19e1a526d85e43aafa0f26c569e2094f63f8~~


1	+ OPENROUTER_API_KEY=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

.py ADDED Viewed

+from dotenv import load_dotenv
+import os
+load_dotenv()
+print("KEY", os.getenv("OPENROUTER_API_KEY"))

summarizer.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import os
 import requests
 from dotenv import load_dotenv
 load_dotenv()
 api_key = os.getenv("OPENROUTER_API_KEY")
 if not api_key or not api_key.strip():
     raise RuntimeError("❌ OPENROUTER_API_KEY bulunamadı. Hugging Face Secrets kısmına eklenmeli.")
@@ -37,6 +41,16 @@ Aşağıdaki metni 3 ayrı biçimde özetle:
         instruction = "Metne uygun başlık önerileri üret."
     elif "Not" in mode:
         instruction = "Bu metinden önemli notlar çıkar."
     else:
         instruction = "Metni kısa ve teknik bir şekilde özetle."
@@ -67,3 +81,13 @@ def summarize_text(text, mode, model_name="anthropic/claude-3-haiku", lang_mode=
         return f"❌ HTTP Hatası: {e} | Yanıt: {response.text}"
     except Exception as e:
         return f"❌ Sistemsel Hata: {str(e)}"

 import os
 import requests
 from dotenv import load_dotenv
+from utils import chunk_text_by_tokens  # type: ignore
 load_dotenv()
+print("KEY IN SUMMARIZER:", os.getenv("OPENROUTER_API_KEY"))  # <<< bunu ekle
+api_key = os.getenv("OPENROUTER_API_KEY")
 api_key = os.getenv("OPENROUTER_API_KEY")
 if not api_key or not api_key.strip():
     raise RuntimeError("❌ OPENROUTER_API_KEY bulunamadı. Hugging Face Secrets kısmına eklenmeli.")
         instruction = "Metne uygun başlık önerileri üret."
     elif "Not" in mode:
         instruction = "Bu metinden önemli notlar çıkar."
+    elif "Chat" in mode:
+        instruction = """
+Aşağıdaki yazışmalıarı veya serbest notları oku ve şunları çıkar:
+- Ana konuşma başlıkları
+- Varsa karar verilen noktalar
+- Belirgin fikir veya öneriler
+Yazım sade ve maddeli olsun.
+"""
     else:
         instruction = "Metni kısa ve teknik bir şekilde özetle."
         return f"❌ HTTP Hatası: {e} | Yanıt: {response.text}"
     except Exception as e:
         return f"❌ Sistemsel Hata: {str(e)}"
+def summarize_long_text(text, mode, model_name="anthropic/claude-3-haiku", lang_mode="Otomatik", is_table=False):
+    chunks = chunk_text_by_tokens(text, max_tokens=1300)
+    summaries = []
+    for chunk in chunks:
+        summary = summarize_text(chunk, mode, model_name, lang_mode, is_table)
+        summaries.append(summary)
+    return "\n\n".join(summaries)

ui.py CHANGED Viewed

@@ -2,11 +2,13 @@ import gradio as gr
 import tempfile
 from ocr_engine import extract_text_from_image
 from pdf_reader import extract_text_chunks_from_pdf
-from summarizer import summarize_text
 def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
     if is_table and model_name != "anthropic/claude-3-haiku":
         return "Tablo içeriği için yalnızca Claude önerilir.","",None
     if pdf is not None:
         text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
         if any("[ERROR]" in chunk for chunk in text_chunks):
@@ -22,13 +24,16 @@ def process_input(pdf, image, manual_text, mode, model_name, start_page, end_pag
         return "Lütfen bir giriş türü seçin.", "", None
     all_text = "\n\n".join(text_chunks)
-    summaries = []
-    for chunk in text_chunks:
-        summary = summarize_text(chunk, mode, model_name, lang_mode, is_table)
-        summaries.append(summary)
-    full_summary = "\n\n".join(summaries)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
     temp_file.write(full_summary)
@@ -36,6 +41,7 @@ def process_input(pdf, image, manual_text, mode, model_name, start_page, end_pag
     return all_text, full_summary, temp_file.name
 with gr.Blocks() as demo:
     gr.Markdown("## VizSum")

 import tempfile
 from ocr_engine import extract_text_from_image
 from pdf_reader import extract_text_chunks_from_pdf
+from summarizer import summarize_long_text
+from utils import chunk_text_by_tokens
 def process_input(pdf, image, manual_text, mode, model_name, start_page, end_page, lang_mode, is_table):
     if is_table and model_name != "anthropic/claude-3-haiku":
         return "Tablo içeriği için yalnızca Claude önerilir.","",None
     if pdf is not None:
         text_chunks = extract_text_chunks_from_pdf(pdf, start=int(start_page), end=int(end_page))
         if any("[ERROR]" in chunk for chunk in text_chunks):
         return "Lütfen bir giriş türü seçin.", "", None
     all_text = "\n\n".join(text_chunks)
+    chunk_count = len(chunk_text_by_tokens(all_text, max_tokens=1300))
+    info_block = f"""
+     Sayfa Aralığı: {start_page}–{end_page}
+     Model: {model_name}
+     Chunk Sayısı: {chunk_count}
+""".strip()
+    full_summary = summarize_long_text(all_text, mode, model_name, lang_mode, is_table)
+    full_summary = f"{info_block}\n\n{full_summary}"
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8')
     temp_file.write(full_summary)
     return all_text, full_summary, temp_file.name
 with gr.Blocks() as demo:
     gr.Markdown("## VizSum")

utils.py ADDED Viewed

+def chunk_text_by_tokens(text, max_tokens=1300, approx_tokens_per_word=1.3):
+    """
+    Metni yaklaşık token sayısına göre parçalara böler.
+    Args:
+        text (str): Bölünecek uzun metin.
+        max_tokens (int): Her bir parçanın tahmini token limiti.
+        approx_tokens_per_word (float): Kelime başına ortalama token sayısı.
+    Returns:
+        List[str]: Token limitine uygun metin parçaları.
+    """
+    words = text.split()
+    max_words = int(max_tokens / approx_tokens_per_word)
+    chunks = []
+    for i in range(0, len(words), max_words):
+        chunk = " ".join(words[i:i + max_words])
+        chunks.append(chunk)
+    return chunks