acecalisto3 commited on
Commit
3707133
·
verified ·
1 Parent(s): 352fbac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -31,17 +31,23 @@ logging.basicConfig(
31
  logger = logging.getLogger(__name__)
32
 
33
  class URLProcessor:
34
- def __init__(self, timeout=15, max_retries=3, concurrent_requests=5, cache_dir='cache'):
 
 
 
 
 
 
 
35
  self.cache_dir = Path(cache_dir)
36
  self.cache_dir.mkdir(exist_ok=True)
37
-
38
- # Persistent disk-based caches
39
  self.url_cache = Cache(str(self.cache_dir / 'url_cache'))
40
  self.content_cache = Cache(str(self.cache_dir / 'content_cache'), size_limit=2**30)
41
-
 
42
  self.session = requests.Session()
43
  self.session.headers.update({
44
- 'User -Agent': self.ua.random,
45
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
46
  'Accept-Language': 'en-US,en;q=0.5',
47
  'Connection': 'keep-alive'
 
31
  logger = logging.getLogger(__name__)
32
 
33
  class URLProcessor:
34
+ """Class to handle URL processing with advanced features"""
35
+
36
+ def __init__(self, timeout=15, max_retries=3, cache_dir='cache'):
37
+ self.ua = UserAgent() # Initialize UserAgent first
38
+ self.timeout = timeout
39
+ self.max_retries = max_retries
40
+
41
+ # Persistent caching setup
42
  self.cache_dir = Path(cache_dir)
43
  self.cache_dir.mkdir(exist_ok=True)
 
 
44
  self.url_cache = Cache(str(self.cache_dir / 'url_cache'))
45
  self.content_cache = Cache(str(self.cache_dir / 'content_cache'), size_limit=2**30)
46
+
47
+ # Session configuration
48
  self.session = requests.Session()
49
  self.session.headers.update({
50
+ 'User-Agent': self.ua.random, # Correct header key
51
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
52
  'Accept-Language': 'en-US,en;q=0.5',
53
  'Connection': 'keep-alive'