Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -204,24 +204,31 @@ class UnifiedAudioConverter:
|
|
204 |
def extract_text_from_pdf(self, pdf_file) -> str:
|
205 |
"""Extract text content from PDF file"""
|
206 |
try:
|
207 |
-
#
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
211 |
|
212 |
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
213 |
-
loader = PyPDFLoader(
|
214 |
pages = loader.load()
|
215 |
|
216 |
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
217 |
text = "\n".join([page.page_content for page in pages])
|
218 |
|
219 |
-
# ์์
|
220 |
-
os.
|
|
|
221 |
|
222 |
return text
|
223 |
except Exception as e:
|
224 |
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
|
|
|
|
225 |
|
226 |
def _get_messages_formatter_type(self, model_name):
|
227 |
"""Get appropriate message formatter for the model"""
|
@@ -680,6 +687,7 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
|
|
680 |
else: # PDF
|
681 |
if not article_input:
|
682 |
return "Please upload a PDF file.", None
|
|
|
683 |
text = converter.extract_text_from_pdf(article_input)
|
684 |
|
685 |
# Limit text to max words
|
|
|
204 |
def extract_text_from_pdf(self, pdf_file) -> str:
|
205 |
"""Extract text content from PDF file"""
|
206 |
try:
|
207 |
+
# Gradio returns file path, not file object
|
208 |
+
if isinstance(pdf_file, str):
|
209 |
+
pdf_path = pdf_file
|
210 |
+
else:
|
211 |
+
# If it's a file object (shouldn't happen with Gradio)
|
212 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
213 |
+
tmp_file.write(pdf_file.read())
|
214 |
+
pdf_path = tmp_file.name
|
215 |
|
216 |
# PDF ๋ก๋ ๋ฐ ํ
์คํธ ์ถ์ถ
|
217 |
+
loader = PyPDFLoader(pdf_path)
|
218 |
pages = loader.load()
|
219 |
|
220 |
# ๋ชจ๋ ํ์ด์ง์ ํ
์คํธ๋ฅผ ๊ฒฐํฉ
|
221 |
text = "\n".join([page.page_content for page in pages])
|
222 |
|
223 |
+
# ์์ ํ์ผ์ธ ๊ฒฝ์ฐ ์ญ์
|
224 |
+
if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
|
225 |
+
os.unlink(pdf_path)
|
226 |
|
227 |
return text
|
228 |
except Exception as e:
|
229 |
raise RuntimeError(f"Failed to extract text from PDF: {e}")
|
230 |
+
|
231 |
+
|
232 |
|
233 |
def _get_messages_formatter_type(self, model_name):
|
234 |
"""Get appropriate message formatter for the model"""
|
|
|
687 |
else: # PDF
|
688 |
if not article_input:
|
689 |
return "Please upload a PDF file.", None
|
690 |
+
# Gradio returns the file path as a string
|
691 |
text = converter.extract_text_from_pdf(article_input)
|
692 |
|
693 |
# Limit text to max words
|