Spaces:
Sleeping
Sleeping
File size: 2,369 Bytes
172e21d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
import requests
from application.utils.logger import get_logger
from langchain_core.tools import tool
logger = get_logger()
@tool
def download_pdf(filename:str, url: str, save_path: str = "reports", overwrite: bool = False):
"""
Downloads a PDF file from a given URL ('pdf_link') and saves it locally
with the specified 'filename'. Returns the local path if successful, otherwise None.
Use this tool AFTER get_sustainability_report_pdf has returned a valid PDF link or if user provides the PDF link.
Args:
filename (str): The name to save the PDF as (should end with .pdf).
url (str): The direct URL to the PDF file.
save_path (str): The directory to save the PDF into (default: "reports").
overwrite (bool): Whether to overwrite the file if it already exists.
Returns:
str | None: The path to the saved file if successful, otherwise None.
"""
try:
# parsed_url = urlparse(url)
# filename = os.path.basename(parsed_url.path)
if not filename.lower().endswith(".pdf"):
logger.warning(f"URL does not point to a PDF file: {url}")
return None
os.makedirs(save_path, exist_ok=True)
full_path = os.path.join(save_path, filename)
if os.path.exists(full_path) and not overwrite:
logger.info(f"File already exists, skipping download: {full_path}")
return full_path
logger.info(f"Starting download from {url}")
response = requests.get(url, stream=True, timeout=20)
response.raise_for_status()
with open(full_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
logger.info(f"Successfully downloaded to: {full_path}")
return full_path
except requests.exceptions.Timeout:
logger.error(f"Timeout while downloading {url}")
except requests.exceptions.HTTPError as http_err:
logger.error(f"HTTP error while downloading {url}: {http_err}")
except requests.exceptions.RequestException as req_err:
logger.error(f"Request error while downloading {url}: {req_err}")
except Exception as e:
logger.error(f"Unexpected error: {e}")
return None |