File size: 5,048 Bytes
78fae79
 
 
 
 
 
 
f80bc8c
78fae79
d45ca00
78fae79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f80bc8c
 
 
 
 
 
 
 
 
9ff0896
 
 
 
f80bc8c
 
 
 
 
 
 
 
9ff0896
 
78fae79
9ff0896
 
78fae79
 
9ff0896
78fae79
 
9ff0896
 
78fae79
 
 
 
 
d0adb44
 
 
 
78fae79
 
 
 
 
9ff0896
78fae79
9ff0896
78fae79
 
 
 
e928691
78fae79
83e3ece
d0adb44
78fae79
 
 
f80bc8c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio as gr
import google.generativeai as genai
import markdown
from docx import Document
from bs4 import BeautifulSoup
import shutil
import os
import PyPDF2  # PDF işleme için subprocess yerine Python kütüphanesi kullanın

# Setup your API key
def setup_api_key():
    google_api_key = os.getenv("GOOGLE_API_KEY")
    genai.configure(api_key=google_api_key)

def upload_file(file_path):
    print(f"Uploading file...")
    text_file = genai.upload_file(path=file_path)
    print(f"Completed upload: {text_file.uri}")
    return text_file

def to_markdown(text):
    text = text.replace('•', '  *')
    return markdown.markdown(text)

def build_model(text_file):
    generation_config = {
        "temperature": 0.2,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        system_instruction="""Answer the questions based on the uploaded file.
        If there is no related info in the file just reply 'I don't know.' """,
    )

    chat_session = model.start_chat(history=[])

    response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
    return chat_session

def chat(chat_session, prompt):
    response = chat_session.send_message(prompt)
    return response.text

def generate_report(chat_session, questions):
    report_text = ""
    report_text += f"\n## QUESTIONS & ANSWERS\n"
    for question in questions:
        report_text += f"\n## {question}\n"
        answer = chat(chat_session, question)
        report_text += f"\n{answer}\n"
    return report_text

def convert_markdown_to_html(report_text):
    html_text = markdown.markdown(report_text)
    return html_text

def add_html_to_word(html_text, doc):
    soup = BeautifulSoup(html_text, 'html.parser')
    for element in soup:
        if element.name == 'h1':
            doc.add_heading(element.get_text(), level=1)
        elif element.name == 'h2':
            doc.add_heading(element.get_text(), level=2)
        elif element.name == 'h3':
            doc.add_heading(element.get_text(), level=3)
        elif element.name == 'h4':
            doc.add_heading(element.get_text(), level=4)
        elif element.name == 'h5':
            doc.add_heading(element.get_text(), level=5)
        elif element.name == 'h6':
            doc.add_heading(element.get_text(), level=6)
        elif element.name == 'p':
            doc.add_paragraph(element.get_text())
        elif element.name == 'ul':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Bullet')
        elif element.name == 'ol':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Number')
        elif element.name:
            doc.add_paragraph(element.get_text())  # For any other tags

def extract_text_from_pdf(pdf_path):
    """PDF dosyasından metin çıkarmak için PyPDF2 kullanır"""
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page_num in range(len(pdf_reader.pages)):
            text += pdf_reader.pages[page_num].extract_text() + "\n"
    return text

def process_pdf(pdf_file, user_questions):
    file_name = pdf_file.split('/')[-1]
    saved_file_path = f"/tmp/{file_name}"
    shutil.copyfile(pdf_file, saved_file_path)
    
    # PDF'den doğrudan metin çıkar
    text = extract_text_from_pdf(saved_file_path)
    
    # Çıkarılan metni bir dosyaya yaz
    with open("/tmp/text_file.txt", "w", encoding="utf-8") as f:
        f.write(text)
    
    text_file = upload_file("/tmp/text_file.txt")
    chat_session = build_model(text_file)

    questions = user_questions.strip().split('\n')
    report_text = generate_report(chat_session, questions)

    doc = Document()
    html_text = convert_markdown_to_html(report_text)
    add_html_to_word(html_text, doc)

    doc_name = file_name.replace(".pdf", ".docx")
    doc_name = "Report_" + doc_name
    doc.save(f"/tmp/{doc_name}")

    return html_text, f"/tmp/{doc_name}"

questions = [
    "Makalenin yazarları kimlerdir?",
    "Hangi modeller kullanılmıştır?",
    "Kaç referans vardır?",
    "Hangi yılda yayınlanmıştır?"
]

questions_str = "\n".join(questions)

iface = gr.Interface(
    fn=process_pdf,
    inputs=[
        gr.File(label="Upload PDF", type="filepath"),
        gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
    ],
    outputs=[
        gr.HTML(label="HTML Formatted Report"),
        gr.File(label="DOCX File Output", type="binary")
    ],
    title="Pdflerinizden kısa rapor oluşturma arac @YED",
    description="Sorularınızı sormak ve cevap almak için PDF'inizi yükleyin."
)

setup_api_key()
# Hugging Face Spaces için önerilen launch konfigürasyonu
iface.launch(server_name="0.0.0.0", server_port=7860)