Final_Assignment_Project

Sleeping

wt002 commited on May 6

Commit

3102ee4

verified ·

1 Parent(s): aa1f478

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -73,28 +73,26 @@ class BasicAgent:
         return page.summary if page.exists() else "No Wikipedia page found"
     def process_document(self, file_path: str) -> str:
-        """Extract text from PDF, Word, CSV, Excel"""
         if not os.path.exists(file_path):
             return "File not found"
-        ext = os.path.splitext(file_path)[1].lower()
-        try:
-            if ext == '.pdf':
                 with open(file_path, 'rb') as f:
-                    reader = PyPDF.PdfReader(f)
-                    return "\n".join([page.extract_text() for page in reader.pages])
-            elif ext in ('.doc', '.docx'):
-                doc = Document(file_path)
-                return "\n".join([para.text for para in doc.paragraphs])
-            elif ext == '.csv':
-                return pd.read_csv(file_path).to_string()
-            elif ext in ('.xls', '.xlsx'):
-                return pd.read_excel(file_path).to_string()
-            else:
-                return "Unsupported file format"
-        except Exception as e:
-            return f"Error processing document: {str(e)}"
     def __call__(self, query: str) -> str:
         """Handle queries (text, search, or file processing)"""

         return page.summary if page.exists() else "No Wikipedia page found"
     def process_document(self, file_path: str) -> str:
+        """Extract text from PDF (works with PyPDF2 or pypdf)"""
         if not os.path.exists(file_path):
             return "File not found"
+        if file_path.lower().endswith('.pdf'):
+            try:
+                # Try modern pypdf first
+                from pypdf import PdfReader
+            except ImportError:
+                # Fallback to PyPDF2
+                from PyPDF2 import PdfReader
+            try:
                 with open(file_path, 'rb') as f:
+                    reader = PdfReader(f)
+                    text = "\n".join([page.extract_text() for page in reader.pages])
+                    return text if text.strip() else "PDF has no extractable text"
+            except Exception as e:
+                return f"PDF processing error: {str(e)}"
     def __call__(self, query: str) -> str:
         """Handle queries (text, search, or file processing)"""