yunuseduran commited on
Commit
f80bc8c
·
verified ·
1 Parent(s): 33774ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -4,8 +4,8 @@ import markdown
4
  from docx import Document
5
  from bs4 import BeautifulSoup
6
  import shutil
7
- import subprocess
8
  import os
 
9
 
10
  # Setup your API key
11
  def setup_api_key():
@@ -86,15 +86,27 @@ def add_html_to_word(html_text, doc):
86
  elif element.name:
87
  doc.add_paragraph(element.get_text()) # For any other tags
88
 
 
 
 
 
 
 
 
 
 
89
  def process_pdf(pdf_file, user_questions):
90
  file_name = pdf_file.split('/')[-1]
91
  saved_file_path = f"/tmp/{file_name}"
92
  shutil.copyfile(pdf_file, saved_file_path)
93
-
94
- subprocess.run(["apt-get", "update"])
95
- subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
96
- subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])
97
-
 
 
 
98
  text_file = upload_file("/tmp/text_file.txt")
99
  chat_session = build_model(text_file)
100
 
@@ -135,4 +147,5 @@ iface = gr.Interface(
135
  )
136
 
137
  setup_api_key()
138
- iface.launch(share=True)
 
 
4
  from docx import Document
5
  from bs4 import BeautifulSoup
6
  import shutil
 
7
  import os
8
+ import PyPDF2 # PDF işleme için subprocess yerine Python kütüphanesi kullanın
9
 
10
  # Setup your API key
11
  def setup_api_key():
 
86
  elif element.name:
87
  doc.add_paragraph(element.get_text()) # For any other tags
88
 
89
+ def extract_text_from_pdf(pdf_path):
90
+ """PDF dosyasından metin çıkarmak için PyPDF2 kullanır"""
91
+ text = ""
92
+ with open(pdf_path, 'rb') as file:
93
+ pdf_reader = PyPDF2.PdfReader(file)
94
+ for page_num in range(len(pdf_reader.pages)):
95
+ text += pdf_reader.pages[page_num].extract_text() + "\n"
96
+ return text
97
+
98
  def process_pdf(pdf_file, user_questions):
99
  file_name = pdf_file.split('/')[-1]
100
  saved_file_path = f"/tmp/{file_name}"
101
  shutil.copyfile(pdf_file, saved_file_path)
102
+
103
+ # PDF'den doğrudan metin çıkar
104
+ text = extract_text_from_pdf(saved_file_path)
105
+
106
+ # Çıkarılan metni bir dosyaya yaz
107
+ with open("/tmp/text_file.txt", "w", encoding="utf-8") as f:
108
+ f.write(text)
109
+
110
  text_file = upload_file("/tmp/text_file.txt")
111
  chat_session = build_model(text_file)
112
 
 
147
  )
148
 
149
  setup_api_key()
150
+ # Hugging Face Spaces için önerilen launch konfigürasyonu
151
+ iface.launch(server_name="0.0.0.0", server_port=7860)