Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -37,6 +37,26 @@ import asyncio
|
|
37 |
import aiohttp
|
38 |
from tqdm import tqdm
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
class URLProcessor:
|
41 |
def __init__(self):
|
42 |
self.config = Config()
|
@@ -458,7 +478,6 @@ class URLProcessor:
|
|
458 |
logger.error(f"Selenium processing failed for {url}: {e}")
|
459 |
return None
|
460 |
|
461 |
-
|
462 |
class FileProcessor:
|
463 |
"""Class to handle file processing"""
|
464 |
|
|
|
37 |
import aiohttp
|
38 |
from tqdm import tqdm
|
39 |
|
40 |
+
class Config:
|
41 |
+
def __init__(self):
|
42 |
+
self.settings = {
|
43 |
+
'TIMEOUT': int(os.getenv('URLD_TIMEOUT', 10)),
|
44 |
+
'MAX_FILE_SIZE': int(os.getenv('URLD_MAX_FILE_SIZE', 2 * 1024 * 1024 * 1024)),
|
45 |
+
'RESPECT_ROBOTS': os.getenv('URLD_RESPECT_ROBOTS', 'True').lower() == 'true',
|
46 |
+
'USE_PROXY': os.getenv('URLD_USE_PROXY', 'False').lower() == 'true',
|
47 |
+
'PROXY_URL': os.getenv('URLD_PROXY_URL', ''),
|
48 |
+
'REQUEST_DELAY': float(os.getenv('URLD_REQUEST_DELAY', 1.0)),
|
49 |
+
'MAX_RETRIES': int(os.getenv('URLD_MAX_RETRIES', 3)),
|
50 |
+
'OUTPUT_FORMAT': os.getenv('URLD_OUTPUT_FORMAT', 'json'),
|
51 |
+
'CHROME_DRIVER_PATH': os.getenv('URLD_CHROME_DRIVER_PATH', '/usr/local/bin/chromedriver'),
|
52 |
+
}
|
53 |
+
|
54 |
+
def get(self, key: str) -> Any:
|
55 |
+
return self.settings.get(key)
|
56 |
+
|
57 |
+
def update(self, settings: Dict[str, Any]) -> None:
|
58 |
+
self.settings.update(settings)
|
59 |
+
|
60 |
class URLProcessor:
|
61 |
def __init__(self):
|
62 |
self.config = Config()
|
|
|
478 |
logger.error(f"Selenium processing failed for {url}: {e}")
|
479 |
return None
|
480 |
|
|
|
481 |
class FileProcessor:
|
482 |
"""Class to handle file processing"""
|
483 |
|