Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import mimetypes
|
|
7 |
import zipfile
|
8 |
import tempfile
|
9 |
from datetime import datetime
|
10 |
-
from typing import List, Dict, Optional, Union
|
11 |
from pathlib import Path
|
12 |
import requests
|
13 |
import validators
|
@@ -96,8 +96,7 @@ class URLProcessor:
|
|
96 |
if not file_id:
|
97 |
logger.error(f"Invalid Google Drive URL: {url}")
|
98 |
return None
|
99 |
-
|
100 |
-
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
101 |
response = self.session.get(direct_url, timeout=self.timeout)
|
102 |
response.raise_for_status()
|
103 |
|
@@ -222,7 +221,7 @@ class FileProcessor:
|
|
222 |
content = f.read()
|
223 |
|
224 |
return [{
|
225 |
-
'source': 'file',
|
226 |
'filename': os.path.basename(file_path),
|
227 |
'file_size': file_stat.st_size,
|
228 |
'mime_type': mimetypes.guess_type(file_path)[0],
|
@@ -329,7 +328,7 @@ def create_interface():
|
|
329 |
|
330 |
# Process URLs
|
331 |
if urls:
|
332 |
-
url_list = re.split(r
|
333 |
url_list = [url.strip() for url in url_list if url.strip()]
|
334 |
|
335 |
for url in url_list:
|
@@ -338,7 +337,7 @@ def create_interface():
|
|
338 |
content = processor.fetch_content(url)
|
339 |
if content:
|
340 |
results.append({
|
341 |
-
'source': 'url',
|
342 |
'url': url,
|
343 |
'content': content,
|
344 |
'timestamp': datetime.now().isoformat()
|
|
|
7 |
import zipfile
|
8 |
import tempfile
|
9 |
from datetime import datetime
|
10 |
+
from typing import List, Dict, Optional, Union
|
11 |
from pathlib import Path
|
12 |
import requests
|
13 |
import validators
|
|
|
96 |
if not file_id:
|
97 |
logger.error(f"Invalid Google Drive URL: {url}")
|
98 |
return None
|
99 |
+
direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
|
|
|
100 |
response = self.session.get(direct_url, timeout=self.timeout)
|
101 |
response.raise_for_status()
|
102 |
|
|
|
221 |
content = f.read()
|
222 |
|
223 |
return [{
|
224 |
+
' source': 'file',
|
225 |
'filename': os.path.basename(file_path),
|
226 |
'file_size': file_stat.st_size,
|
227 |
'mime_type': mimetypes.guess_type(file_path)[0],
|
|
|
328 |
|
329 |
# Process URLs
|
330 |
if urls:
|
331 |
+
url_list = re.split(r'[,\n]', urls)
|
332 |
url_list = [url.strip() for url in url_list if url.strip()]
|
333 |
|
334 |
for url in url_list:
|
|
|
337 |
content = processor.fetch_content(url)
|
338 |
if content:
|
339 |
results.append({
|
340 |
+
'source': ' url',
|
341 |
'url': url,
|
342 |
'content': content,
|
343 |
'timestamp': datetime.now().isoformat()
|