Update app.py
Browse files
app.py
CHANGED
@@ -13,19 +13,30 @@ from cachetools import TTLCache
|
|
13 |
cache = TTLCache(maxsize=1000, ttl=3600)
|
14 |
|
15 |
async def fetch_url(url, session, max_retries=3, timeout=180):
|
|
|
16 |
for attempt in range(max_retries):
|
17 |
try:
|
18 |
async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
|
19 |
response.raise_for_status()
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
except asyncio.TimeoutError:
|
23 |
print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
|
24 |
-
except aiohttp.ClientPayloadError as e:
|
25 |
-
print(f"Payload error on attempt {attempt + 1}: {str(e)}", flush=True)
|
26 |
-
if response.content_length and len(content) >= response.content_length:
|
27 |
-
print(f"Received data length: {len(content)}, expected: {response.content_length}", flush=True)
|
28 |
-
return content.decode('utf-8', errors='ignore')
|
29 |
except aiohttp.ClientError as e:
|
30 |
print(f"Attempt {attempt + 1} failed: {str(e)}", flush=True)
|
31 |
|
|
|
13 |
cache = TTLCache(maxsize=1000, ttl=3600)
|
14 |
|
15 |
async def fetch_url(url, session, max_retries=3, timeout=180):
|
16 |
+
total_content = b""
|
17 |
for attempt in range(max_retries):
|
18 |
try:
|
19 |
async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
|
20 |
response.raise_for_status()
|
21 |
+
while True:
|
22 |
+
chunk = await response.content.read(8192) # 每次读取8KB
|
23 |
+
if not chunk:
|
24 |
+
break
|
25 |
+
total_content += chunk
|
26 |
+
|
27 |
+
if response.content_length is not None and len(total_content) < response.content_length:
|
28 |
+
print(f"Warning: Received content length ({len(total_content)}) is less than expected ({response.content_length})")
|
29 |
+
if attempt == max_retries - 1:
|
30 |
+
print("This was the last attempt. Returning partial content.")
|
31 |
+
return total_content.decode('utf-8', errors='ignore')
|
32 |
+
else:
|
33 |
+
print("Retrying...")
|
34 |
+
await asyncio.sleep(5)
|
35 |
+
continue
|
36 |
+
|
37 |
+
return total_content.decode('utf-8', errors='ignore')
|
38 |
except asyncio.TimeoutError:
|
39 |
print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
|
|
|
|
|
|
|
|
|
|
|
40 |
except aiohttp.ClientError as e:
|
41 |
print(f"Attempt {attempt + 1} failed: {str(e)}", flush=True)
|
42 |
|