acecalisto3 commited on
Commit
e784699
·
verified ·
1 Parent(s): 92fa27c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -131,6 +131,7 @@ class URLProcessor:
131
  logger.error(f"Calendar fetch failed: {e}")
132
  return None
133
 
 
134
  def _fetch_html_content(self, url: str) -> Optional[Dict]:
135
  """Standard HTML content processing"""
136
  try:
@@ -146,6 +147,15 @@ class URLProcessor:
146
  # Extract main content
147
  main_content = soup.find('main') or soup.find('article') or soup.body
148
 
 
 
 
 
 
 
 
 
 
149
  # Clean and structure content
150
  text_content = main_content.get_text(separator='\n', strip=True)
151
  cleaned_content = self.advanced_text_cleaning(text_content)
 
131
  logger.error(f"Calendar fetch failed: {e}")
132
  return None
133
 
134
+
135
  def _fetch_html_content(self, url: str) -> Optional[Dict]:
136
  """Standard HTML content processing"""
137
  try:
 
147
  # Extract main content
148
  main_content = soup.find('main') or soup.find('article') or soup.body
149
 
150
+ # Check if main_content is None
151
+ if main_content is None:
152
+ logger.warning(f"No main content found in the HTML for URL: {url}")
153
+ return {
154
+ 'content': "No main content found.",
155
+ 'content_type': response.headers.get('Content-Type', ''),
156
+ 'timestamp': datetime.now().isoformat()
157
+ }
158
+
159
  # Clean and structure content
160
  text_content = main_content.get_text(separator='\n', strip=True)
161
  cleaned_content = self.advanced_text_cleaning(text_content)