wt002 commited on
Commit
3102ee4
·
verified ·
1 Parent(s): aa1f478

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -73,28 +73,26 @@ class BasicAgent:
73
  return page.summary if page.exists() else "No Wikipedia page found"
74
 
75
  def process_document(self, file_path: str) -> str:
76
- """Extract text from PDF, Word, CSV, Excel"""
77
  if not os.path.exists(file_path):
78
  return "File not found"
 
 
 
 
 
 
 
 
79
 
80
- ext = os.path.splitext(file_path)[1].lower()
81
-
82
- try:
83
- if ext == '.pdf':
84
  with open(file_path, 'rb') as f:
85
- reader = PyPDF.PdfReader(f)
86
- return "\n".join([page.extract_text() for page in reader.pages])
87
- elif ext in ('.doc', '.docx'):
88
- doc = Document(file_path)
89
- return "\n".join([para.text for para in doc.paragraphs])
90
- elif ext == '.csv':
91
- return pd.read_csv(file_path).to_string()
92
- elif ext in ('.xls', '.xlsx'):
93
- return pd.read_excel(file_path).to_string()
94
- else:
95
- return "Unsupported file format"
96
- except Exception as e:
97
- return f"Error processing document: {str(e)}"
98
 
99
  def __call__(self, query: str) -> str:
100
  """Handle queries (text, search, or file processing)"""
 
73
  return page.summary if page.exists() else "No Wikipedia page found"
74
 
75
  def process_document(self, file_path: str) -> str:
76
+ """Extract text from PDF (works with PyPDF2 or pypdf)"""
77
  if not os.path.exists(file_path):
78
  return "File not found"
79
+
80
+ if file_path.lower().endswith('.pdf'):
81
+ try:
82
+ # Try modern pypdf first
83
+ from pypdf import PdfReader
84
+ except ImportError:
85
+ # Fallback to PyPDF2
86
+ from PyPDF2 import PdfReader
87
 
88
+ try:
 
 
 
89
  with open(file_path, 'rb') as f:
90
+ reader = PdfReader(f)
91
+ text = "\n".join([page.extract_text() for page in reader.pages])
92
+ return text if text.strip() else "PDF has no extractable text"
93
+ except Exception as e:
94
+ return f"PDF processing error: {str(e)}"
95
+
 
 
 
 
 
 
 
96
 
97
  def __call__(self, query: str) -> str:
98
  """Handle queries (text, search, or file processing)"""