openfree commited on
Commit
b25907a
ยท
verified ยท
1 Parent(s): 3233647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -204,24 +204,31 @@ class UnifiedAudioConverter:
204
  def extract_text_from_pdf(self, pdf_file) -> str:
205
  """Extract text content from PDF file"""
206
  try:
207
- # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
208
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
209
- tmp_file.write(pdf_file.read())
210
- tmp_path = tmp_file.name
 
 
 
 
211
 
212
  # PDF ๋กœ๋“œ ๋ฐ ํ…์ŠคํŠธ ์ถ”์ถœ
213
- loader = PyPDFLoader(tmp_path)
214
  pages = loader.load()
215
 
216
  # ๋ชจ๋“  ํŽ˜์ด์ง€์˜ ํ…์ŠคํŠธ๋ฅผ ๊ฒฐํ•ฉ
217
  text = "\n".join([page.page_content for page in pages])
218
 
219
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
220
- os.unlink(tmp_path)
 
221
 
222
  return text
223
  except Exception as e:
224
  raise RuntimeError(f"Failed to extract text from PDF: {e}")
 
 
225
 
226
  def _get_messages_formatter_type(self, model_name):
227
  """Get appropriate message formatter for the model"""
@@ -680,6 +687,7 @@ async def synthesize(article_input, input_type: str = "URL", mode: str = "Local"
680
  else: # PDF
681
  if not article_input:
682
  return "Please upload a PDF file.", None
 
683
  text = converter.extract_text_from_pdf(article_input)
684
 
685
  # Limit text to max words
 
204
  def extract_text_from_pdf(self, pdf_file) -> str:
205
  """Extract text content from PDF file"""
206
  try:
207
+ # Gradio returns file path, not file object
208
+ if isinstance(pdf_file, str):
209
+ pdf_path = pdf_file
210
+ else:
211
+ # If it's a file object (shouldn't happen with Gradio)
212
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
213
+ tmp_file.write(pdf_file.read())
214
+ pdf_path = tmp_file.name
215
 
216
  # PDF ๋กœ๋“œ ๋ฐ ํ…์ŠคํŠธ ์ถ”์ถœ
217
+ loader = PyPDFLoader(pdf_path)
218
  pages = loader.load()
219
 
220
  # ๋ชจ๋“  ํŽ˜์ด์ง€์˜ ํ…์ŠคํŠธ๋ฅผ ๊ฒฐํ•ฉ
221
  text = "\n".join([page.page_content for page in pages])
222
 
223
+ # ์ž„์‹œ ํŒŒ์ผ์ธ ๊ฒฝ์šฐ ์‚ญ์ œ
224
+ if not isinstance(pdf_file, str) and os.path.exists(pdf_path):
225
+ os.unlink(pdf_path)
226
 
227
  return text
228
  except Exception as e:
229
  raise RuntimeError(f"Failed to extract text from PDF: {e}")
230
+
231
+
232
 
233
  def _get_messages_formatter_type(self, model_name):
234
  """Get appropriate message formatter for the model"""
 
687
  else: # PDF
688
  if not article_input:
689
  return "Please upload a PDF file.", None
690
+ # Gradio returns the file path as a string
691
  text = converter.extract_text_from_pdf(article_input)
692
 
693
  # Limit text to max words