import os import requests from application.utils.logger import get_logger from langchain_core.tools import tool logger = get_logger() @tool def download_pdf(filename:str, url: str, save_path: str = "reports", overwrite: bool = False): """ Downloads a PDF file from a given URL ('pdf_link') and saves it locally with the specified 'filename'. Returns the local path if successful, otherwise None. Use this tool AFTER get_sustainability_report_pdf has returned a valid PDF link or if user provides the PDF link. Args: filename (str): The name to save the PDF as (should end with .pdf). url (str): The direct URL to the PDF file. save_path (str): The directory to save the PDF into (default: "reports"). overwrite (bool): Whether to overwrite the file if it already exists. Returns: str | None: The path to the saved file if successful, otherwise None. """ try: # parsed_url = urlparse(url) # filename = os.path.basename(parsed_url.path) if not filename.lower().endswith(".pdf"): logger.warning(f"URL does not point to a PDF file: {url}") return None os.makedirs(save_path, exist_ok=True) full_path = os.path.join(save_path, filename) if os.path.exists(full_path) and not overwrite: logger.info(f"File already exists, skipping download: {full_path}") return full_path logger.info(f"Starting download from {url}") response = requests.get(url, stream=True, timeout=20) response.raise_for_status() with open(full_path, "wb") as file: for chunk in response.iter_content(chunk_size=8192): if chunk: file.write(chunk) logger.info(f"Successfully downloaded to: {full_path}") return full_path except requests.exceptions.Timeout: logger.error(f"Timeout while downloading {url}") except requests.exceptions.HTTPError as http_err: logger.error(f"HTTP error while downloading {url}: {http_err}") except requests.exceptions.RequestException as req_err: logger.error(f"Request error while downloading {url}: {req_err}") except Exception as e: logger.error(f"Unexpected error: {e}") return None