Spaces:

acecalisto3
/

urld

Running

acecalisto3 commited on Mar 26

Commit

e784699

verified ·

1 Parent(s): 92fa27c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -131,6 +131,7 @@ class URLProcessor:
             logger.error(f"Calendar fetch failed: {e}")
             return None
     def _fetch_html_content(self, url: str) -> Optional[Dict]:
         """Standard HTML content processing"""
         try:
@@ -146,6 +147,15 @@ class URLProcessor:
             # Extract main content
             main_content = soup.find('main') or soup.find('article') or soup.body
             # Clean and structure content
             text_content = main_content.get_text(separator='\n', strip=True)
             cleaned_content = self.advanced_text_cleaning(text_content)

             logger.error(f"Calendar fetch failed: {e}")
             return None
     def _fetch_html_content(self, url: str) -> Optional[Dict]:
         """Standard HTML content processing"""
         try:
             # Extract main content
             main_content = soup.find('main') or soup.find('article') or soup.body
+            # Check if main_content is None
+            if main_content is None:
+                logger.warning(f"No main content found in the HTML for URL: {url}")
+                return {
+                    'content': "No main content found.",
+                    'content_type': response.headers.get('Content-Type', ''),
+                    'timestamp': datetime.now().isoformat()
+                }
             # Clean and structure content
             text_content = main_content.get_text(separator='\n', strip=True)
             cleaned_content = self.advanced_text_cleaning(text_content)