Spaces:

acecalisto3
/

urld

Running

acecalisto3 commited on Mar 12

Commit

eed2b1e

verified ·

1 Parent(s): 83a6fef

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -100,15 +100,34 @@ class URLProcessor:
     @sleep_and_retry
     @limits(calls=20, period=60)  # Refined rate limiting
     def fetch_content(self, url: str) -> Optional[Dict]:
         cached = self.content_cache.get(url)
         if cached:
             return cached
         try:
             response = self.session.get(url, timeout=self.timeout)
             soup = BeautifulSoup(response.text, 'html.parser')
-            # Extract structured elements
             title = soup.title.text.strip() if soup.title else ''
             meta_desc = soup.find('meta', {'name': 'description'})
             description = meta_desc['content'].strip() if meta_desc else ''

     @sleep_and_retry
     @limits(calls=20, period=60)  # Refined rate limiting
     def fetch_content(self, url: str) -> Optional[Dict]:
+        """Fetch and structure content from URL, handling Google Drive and Calendar links."""
+        if 'drive.google.com' in url:
+            # Convert Google Drive URL to direct download link
+            file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
+            if file_id:
+                url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
+            else:
+                logger.error(f"Invalid Google Drive URL: {url}")
+                return None
         cached = self.content_cache.get(url)
         if cached:
             return cached
         try:
             response = self.session.get(url, timeout=self.timeout)
+            response.raise_for_status()
+            # Handle ICS files (Google Calendar)
+            if 'text/calendar' in response.headers.get('Content-Type', ''):
+                return {
+                    'content': response.text,
+                    'content_type': 'text/calendar',
+                    'timestamp': datetime.now().isoformat()
+                }
+            # Handle HTML content
             soup = BeautifulSoup(response.text, 'html.parser')
             title = soup.title.text.strip() if soup.title else ''
             meta_desc = soup.find('meta', {'name': 'description'})
             description = meta_desc['content'].strip() if meta_desc else ''