sfun commited on
Commit
8788463
·
verified ·
1 Parent(s): 08f92de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -13,19 +13,30 @@ from cachetools import TTLCache
13
  cache = TTLCache(maxsize=1000, ttl=3600)
14
 
15
  async def fetch_url(url, session, max_retries=3, timeout=180):
 
16
  for attempt in range(max_retries):
17
  try:
18
  async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
19
  response.raise_for_status()
20
- content = await response.read()
21
- return content.decode('utf-8', errors='ignore')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  except asyncio.TimeoutError:
23
  print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
24
- except aiohttp.ClientPayloadError as e:
25
- print(f"Payload error on attempt {attempt + 1}: {str(e)}", flush=True)
26
- if response.content_length and len(content) >= response.content_length:
27
- print(f"Received data length: {len(content)}, expected: {response.content_length}", flush=True)
28
- return content.decode('utf-8', errors='ignore')
29
  except aiohttp.ClientError as e:
30
  print(f"Attempt {attempt + 1} failed: {str(e)}", flush=True)
31
 
 
13
  cache = TTLCache(maxsize=1000, ttl=3600)
14
 
15
  async def fetch_url(url, session, max_retries=3, timeout=180):
16
+ total_content = b""
17
  for attempt in range(max_retries):
18
  try:
19
  async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
20
  response.raise_for_status()
21
+ while True:
22
+ chunk = await response.content.read(8192) # 每次读取8KB
23
+ if not chunk:
24
+ break
25
+ total_content += chunk
26
+
27
+ if response.content_length is not None and len(total_content) < response.content_length:
28
+ print(f"Warning: Received content length ({len(total_content)}) is less than expected ({response.content_length})")
29
+ if attempt == max_retries - 1:
30
+ print("This was the last attempt. Returning partial content.")
31
+ return total_content.decode('utf-8', errors='ignore')
32
+ else:
33
+ print("Retrying...")
34
+ await asyncio.sleep(5)
35
+ continue
36
+
37
+ return total_content.decode('utf-8', errors='ignore')
38
  except asyncio.TimeoutError:
39
  print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
 
 
 
 
 
40
  except aiohttp.ClientError as e:
41
  print(f"Attempt {attempt + 1} failed: {str(e)}", flush=True)
42