Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from application.utils.logger import get_logger | |
from langchain_core.tools import tool | |
logger = get_logger() | |
def download_pdf(filename:str, url: str, save_path: str = "reports", overwrite: bool = False): | |
""" | |
Downloads a PDF file from a given URL ('pdf_link') and saves it locally | |
with the specified 'filename'. Returns the local path if successful, otherwise None. | |
Use this tool AFTER get_sustainability_report_pdf has returned a valid PDF link or if user provides the PDF link. | |
Args: | |
filename (str): The name to save the PDF as (should end with .pdf). | |
url (str): The direct URL to the PDF file. | |
save_path (str): The directory to save the PDF into (default: "reports"). | |
overwrite (bool): Whether to overwrite the file if it already exists. | |
Returns: | |
str | None: The path to the saved file if successful, otherwise None. | |
""" | |
try: | |
# parsed_url = urlparse(url) | |
# filename = os.path.basename(parsed_url.path) | |
if not filename.lower().endswith(".pdf"): | |
logger.warning(f"URL does not point to a PDF file: {url}") | |
return None | |
os.makedirs(save_path, exist_ok=True) | |
full_path = os.path.join(save_path, filename) | |
if os.path.exists(full_path) and not overwrite: | |
logger.info(f"File already exists, skipping download: {full_path}") | |
return full_path | |
logger.info(f"Starting download from {url}") | |
response = requests.get(url, stream=True, timeout=20) | |
response.raise_for_status() | |
with open(full_path, "wb") as file: | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: | |
file.write(chunk) | |
logger.info(f"Successfully downloaded to: {full_path}") | |
return full_path | |
except requests.exceptions.Timeout: | |
logger.error(f"Timeout while downloading {url}") | |
except requests.exceptions.HTTPError as http_err: | |
logger.error(f"HTTP error while downloading {url}: {http_err}") | |
except requests.exceptions.RequestException as req_err: | |
logger.error(f"Request error while downloading {url}: {req_err}") | |
except Exception as e: | |
logger.error(f"Unexpected error: {e}") | |
return None |