Spaces:

yunuseduran
/

chatpdf

Sleeping

App Files Files Community

yunuseduran commited on Apr 18

Commit

a974597

verified ·

1 Parent(s): 702c6ed

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -191

app.py CHANGED Viewed

@@ -1,244 +1,199 @@
 import gradio as gr
 import google.generativeai as genai
 import markdown
 from docx import Document
 from bs4 import BeautifulSoup
-import shutil
-import os
-import PyPDF2
 import tempfile
 from datetime import datetime
-# API anahtarı yapılandırması
 def setup_api_key():
     google_api_key = os.getenv("GOOGLE_API_KEY")
     if not google_api_key:
-        raise ValueError("GOOGLE_API_KEY çevre değişkeni ayarlanmamış.")
     genai.configure(api_key=google_api_key)
-# Dosya yükleme fonksiyonu
-def upload_file(file_path):
-    try:
-        text_file = genai.upload_file(path=file_path)
-        return text_file
-    except Exception as e:
-        raise Exception(f"Dosya yükleme hatası: {str(e)}")
-# Markdown formatına dönüştürme
-def to_markdown(text):
-    text = text.replace('•', '  *')
-    return markdown.markdown(text)
-# AI modelini oluşturma
-def build_model(text_file):
-    generation_config = {
-        "temperature": 0.2,
-        "top_p": 0.95,
-        "top_k": 64,
-        "max_output_tokens": 8192,
-        "response_mime_type": "text/plain",
-    }
-    model = genai.GenerativeModel(
-        model_name="gemini-1.5-flash",
-        generation_config=generation_config,
-        system_instruction="""PDF belgesinden yüklenen bilgilere dayanarak soruları cevapla.
-        Belge içinde ilgili bilgi yoksa 'Bu konuda belgede bilgi bulamadım.' diye yanıtla.
-        Cevaplarında mümkün olduğunca belgedeki bilgileri referans ver ve doğru bilgi sağla.""",
-    )
-    chat_session = model.start_chat(history=[])
-    # Belgeyi özetleyerek başla
-    response = chat_session.send_message(["Bu belgeyi kısaca özetle", text_file])
-    return chat_session, response.text
-# Sohbet fonksiyonu
-def chat(chat_session, prompt):
-    try:
-        response = chat_session.send_message(prompt)
-        return response.text
-    except Exception as e:
-        return f"Yanıt alınamadı: {str(e)}"
-# Rapor oluşturma
-def generate_report(chat_session, questions, summary):
-    report_text = "# PDF Belge Analiz Raporu\n\n"
-    report_text += f"*Oluşturulma tarihi: {datetime.now().strftime('%d.%m.%Y %H:%M')}*\n\n"
-    report_text += f"## Belge Özeti\n\n{summary}\n\n"
-    report_text += f"## Soru ve Cevaplar\n\n"
-    for i, question in enumerate(questions, 1):
-        if not question.strip():
-            continue
-        report_text += f"### Soru {i}: {question}\n\n"
-        answer = chat(chat_session, question)
-        report_text += f"{answer}\n\n"
-    return report_text
-# Markdown'ı HTML'e dönüştürme
-def convert_markdown_to_html(report_text):
-    html_text = markdown.markdown(report_text, extensions=['tables'])
-    return f"""
-    <div style="font-family: Arial, sans-serif; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px;">
-        {html_text}
-    </div>
-    """
-# HTML'i Word belgesine ekleme
-def add_html_to_word(html_text, doc):
-    soup = BeautifulSoup(html_text, 'html.parser')
-    for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'ul', 'ol', 'li']):
-        if element.name.startswith('h') and element.name[1:].isdigit():
-            level = int(element.name[1])
-            doc.add_heading(element.get_text(), level=level)
-        elif element.name == 'p':
-            if element.get_text().strip():
-                doc.add_paragraph(element.get_text())
-        elif element.name == 'ul':
-            for li in element.find_all('li', recursive=False):
-                doc.add_paragraph(li.get_text(), style='List Bullet')
-        elif element.name == 'ol':
-            for li in element.find_all('li', recursive=False):
-                doc.add_paragraph(li.get_text(), style='List Number')
 # PDF'den metin çıkarma
 def extract_text_from_pdf(pdf_path):
-    text = ""
     try:
         with open(pdf_path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
             for page_num in range(len(pdf_reader.pages)):
                 text += pdf_reader.pages[page_num].extract_text() + "\n"
         return text
     except Exception as e:
-        raise Exception(f"PDF okuma hatası: {str(e)}")
-# Ana işlem fonksiyonu
-def process_pdf(pdf_file, user_questions, progress=gr.Progress()):
     if not pdf_file:
         return "Lütfen bir PDF dosyası yükleyin.", None
-    progress(0, desc="PDF yükleniyor...")
-    # Geçici dosya ve klasör yönetimi
-    temp_dir = tempfile.mkdtemp()
-    file_name = os.path.basename(pdf_file)
-    pdf_path = os.path.join(temp_dir, file_name)
     try:
-        # PDF dosyasını geçici konuma kopyala
-        shutil.copyfile(pdf_file, pdf_path)
-        progress(20, desc="PDF'den metin çıkarılıyor...")
-        text = extract_text_from_pdf(pdf_path)
-        # Çıkarılan metni bir dosyaya yaz
-        text_file_path = os.path.join(temp_dir, "extracted_text.txt")
-        with open(text_file_path, "w", encoding="utf-8") as f:
-            f.write(text)
-        progress(40, desc="Metin dosyası yükleniyor...")
-        text_file = upload_file(text_file_path)
-        progress(60, desc="AI modeli hazırlanıyor...")
-        chat_session, summary = build_model(text_file)
-        progress(70, desc="Sorular işleniyor...")
-        # Soruları ayırma
-        questions = [q.strip() for q in user_questions.split('\n') if q.strip()]
-        progress(80, desc="Rapor oluşturuluyor...")
-        report_text = generate_report(chat_session, questions, summary)
-        progress(90, desc="Sonuçlar formatlanıyor...")
-        html_output = convert_markdown_to_html(report_text)
         # Word belgesi oluştur
-        doc = Document()
-        add_html_to_word(html_output, doc)
-        doc_name = f"PDF_Rapor_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
-        doc_path = os.path.join(temp_dir, doc_name)
         doc.save(doc_path)
-        progress(100, desc="Tamamlandı!")
         return html_output, doc_path
     except Exception as e:
-        error_message = f"<div style='color: red; font-weight: bold;'>Hata oluştu: {str(e)}</div>"
         return error_message, None
-    finally:
-        # Geçici dosyaları silme işlemi (opsiyonel)
-        pass
 # Varsayılan sorular
 default_questions = """Belgenin ana konusu nedir?
 Belgenin yazarları kimlerdir?
 Belgedeki önemli bulgular nelerdir?
-Kaç sayfa ve bölüm vardır?
 Hangi tarihte yayınlanmıştır?"""
-# Gradio arayüzü
-with gr.Blocks(theme=gr.themes.Soft()) as iface:
-    gr.Markdown("""
-    # 📄 PDF Soru-Cevap Asistanı
-    Bu uygulama, yüklediğiniz PDF belgesi üzerinde sorular sormanıza ve detaylı bir rapor almanıza olanak tanır.
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            pdf_input = gr.File(
-                label="PDF Dosyası Yükleyin",
-                file_types=[".pdf"],
-                type="filepath"
-            )
-            questions_input = gr.TextArea(
-                label="Sorularınız",
-                placeholder="Her satıra bir soru yazın...",
-                value=default_questions,
-                lines=10
-            )
-            submit_btn = gr.Button("📝 Rapor Oluştur", variant="primary")
-        with gr.Column(scale=2):
-            with gr.Tabs():
-                with gr.Tab("HTML Görünüm"):
-                    html_output = gr.HTML(label="Rapor Sonucu")
-                with gr.Tab("İndirilebilir Dosya"):
-                    file_output = gr.File(label="DOCX Rapor")
-    with gr.Accordion("Nasıl Kullanılır?", open=False):
-        gr.Markdown("""
-        ### Kullanım Adımları:
-        1. PDF dosyanızı yükleyin
-        2. Belge hakkında cevaplarını almak istediğiniz soruları yazın
-        3. "Rapor Oluştur" düğmesine basın
-        4. Oluşturulan raporu HTML olarak görüntüleyin veya DOCX dosyası olarak indirin
-        ### İpuçları:
-        - Her satıra bir soru yazın
-        - Belgenin içeriğiyle ilgili net sorular sorun
-        - Büyük PDF'ler için işlem süresi uzayabilir
-        """)
-    submit_btn.click(
-        fn=process_pdf,
-        inputs=[pdf_input, questions_input],
-        outputs=[html_output, file_output],
-        show_progress=True
-    )
-# API anahtarını ayarla ve uygulamayı başlat
 if __name__ == "__main__":
-    try:
-        setup_api_key()
-        iface.launch(share=True)
-    except ValueError as e:
-        print(f"Hata: {str(e)}")
-        print("Lütfen GOOGLE_API_KEY çevre değişkenini ayarlayın.")

 import gradio as gr
 import google.generativeai as genai
+import os
+import PyPDF2
 import markdown
 from docx import Document
 from bs4 import BeautifulSoup
 import tempfile
 from datetime import datetime
+# API anahtarını ayarla
 def setup_api_key():
     google_api_key = os.getenv("GOOGLE_API_KEY")
     if not google_api_key:
+        return False
     genai.configure(api_key=google_api_key)
+    return True
 # PDF'den metin çıkarma
 def extract_text_from_pdf(pdf_path):
     try:
+        text = ""
         with open(pdf_path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
             for page_num in range(len(pdf_reader.pages)):
                 text += pdf_reader.pages[page_num].extract_text() + "\n"
         return text
     except Exception as e:
+        return f"PDF okuma hatası: {str(e)}"
+# AI modelini kullanarak analiz yap
+def analyze_pdf_content(text, questions):
+    try:
+        # Gemini modeli yapılandırma
+        generation_config = {
+            "temperature": 0.2,
+            "top_p": 0.95,
+            "top_k": 64,
+            "max_output_tokens": 8192,
+        }
+        model = genai.GenerativeModel(
+            model_name="gemini-1.5-flash",
+            generation_config=generation_config,
+        )
+        # İlk önce belgeyi özetle
+        prompt = f"""
+        Aşağıdaki belge metnini analiz edip özetler misin?
+        Belge:
+        {text[:15000]}  # Çok uzun metinlerde kesme yapabilirsiniz
+        Kısa bir özet ver (1-2 paragraf):
+        """
+        response = model.generate_content(prompt)
+        summary = response.text
+        # Sonra soruları yanıtla
+        results = [summary]
+        for question in questions:
+            if not question.strip():
+                continue
+            prompt = f"""
+            Aşağıdaki belge metnine dayanarak soruyu cevapla:
+            Belge:
+            {text[:15000]}  # Çok uzun metinlerde kesme yapabilirsiniz
+            Soru: {question}
+            Cevap:
+            """
+            response = model.generate_content(prompt)
+            results.append((question, response.text))
+        return summary, results
+    except Exception as e:
+        return f"Analiz hatası: {str(e)}", []
+# Markdown'ı HTML'e dönüştürme
+def to_html(text):
+    return markdown.markdown(text)
+# Word belgesi oluşturma
+def create_word_document(summary, results):
+    doc = Document()
+    # Başlık ekle
+    doc.add_heading('PDF Belge Analiz Raporu', 0)
+    # Tarih ekle
+    doc.add_paragraph(f'Oluşturulma Tarihi: {datetime.now().strftime("%d.%m.%Y %H:%M")}')
+    # Özet bölümü
+    doc.add_heading('Belge Özeti', 1)
+    doc.add_paragraph(summary)
+    # Soru ve cevaplar
+    doc.add_heading('Soru ve Cevaplar', 1)
+    for i, (question, answer) in enumerate(results, 1):
+        doc.add_heading(f'Soru {i}: {question}', 2)
+        doc.add_paragraph(answer)
+    return doc
+# Ana işleme fonksiyonu
+def process_pdf(pdf_file, user_questions):
     if not pdf_file:
         return "Lütfen bir PDF dosyası yükleyin.", None
+    # API anahtarını kontrol et
+    if not setup_api_key():
+        return "GOOGLE_API_KEY çevre değişkeni ayarlanmamış. Lütfen API anahtarınızı ekleyin.", None
     try:
+        # PDF'den metin çıkar
+        text = extract_text_from_pdf(pdf_file)
+        if text.startswith("PDF okuma hatası"):
+            return text, None
+        # Soruları ayır
+        questions = [q.strip() for q in user_questions.split('\n') if q.strip()]
+        # Metni analiz et
+        summary, results = analyze_pdf_content(text, questions)
+        if isinstance(summary, str) and summary.startswith("Analiz hatası"):
+            return summary, None
+        # HTML raporu oluştur
+        html_output = f"""
+        <div style="font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px;">
+            <h1>PDF Belge Analiz Raporu</h1>
+            <p><em>Oluşturulma tarihi: {datetime.now().strftime('%d.%m.%Y %H:%M')}</em></p>
+            <h2>Belge Özeti</h2>
+            <div>{to_html(summary)}</div>
+            <h2>Soru ve Cevaplar</h2>
+        """
+        for i, (question, answer) in enumerate(results[1:], 1):  # İlk sonuç zaten özet
+            html_output += f"""
+            <div style="margin-bottom: 20px; padding: 10px; border-left: 3px solid #ccc;">
+                <h3>Soru {i}: {question}</h3>
+                <div>{to_html(answer)}</div>
+            </div>
+            """
+        html_output += "</div>"
         # Word belgesi oluştur
+        doc = create_word_document(summary, results[1:])  # İlk sonuç zaten özet
+        # Geçici dosya oluştur
+        temp_dir = tempfile.gettempdir()
+        doc_path = os.path.join(temp_dir, f"PDF_Rapor_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx")
         doc.save(doc_path)
         return html_output, doc_path
     except Exception as e:
+        error_message = f"<div style='color: red; font-weight: bold;'>İşlem sırasında bir hata oluştu: {str(e)}</div>"
         return error_message, None
 # Varsayılan sorular
 default_questions = """Belgenin ana konusu nedir?
 Belgenin yazarları kimlerdir?
 Belgedeki önemli bulgular nelerdir?
+Kaç sayfa bulunmaktadır?
 Hangi tarihte yayınlanmıştır?"""
+# Gradio arayüzü oluştur - basit Interface kullanarak
+demo = gr.Interface(
+    fn=process_pdf,
+    inputs=[
+        gr.File(label="PDF Dosyası Yükleyin", file_types=[".pdf"]),
+        gr.Textbox(label="Sorularınız (Her satıra bir soru yazın)", value=default_questions, lines=10)
+    ],
+    outputs=[
+        gr.HTML(label="Rapor Sonucu"),
+        gr.File(label="Word Belgesi")
+    ],
+    title="PDF Belgelerinden Soru-Cevap Raporu Oluşturma Aracı",
+    description="PDF belgelerinizi yükleyin ve istediğiniz soruları sorun. AI destekli sistem belgenizi analiz edip yanıtları içeren bir rapor hazırlayacaktır.",
+    allow_flagging="never"
+)
+# Uygulamayı başlat
 if __name__ == "__main__":
+    demo.launch()