import fitz import requests from openai import OpenAI from fpdf import FPDF import cloudinary import cloudinary.uploader def download_pdf(url, save_path): """Download a PDF from a given URL and save it locally.""" response = requests.get(url) with open(save_path, "wb") as f: f.write(response.content) def extract_text_from_pdf(pdf_path): """Extract text from a PDF file.""" doc = fitz.open(pdf_path) text = "".join(page.get_text() for page in doc) return text def generate_structured_summary(text): client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key="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", ) prompt = ( "Generate a professional structured report from the following text. " "The output should follow this format:\n\n" "**Report Title**\n" "**1. Introduction**\n" "**2. Key Findings** (Use bullet points)\n" "**3. Analysis** (Include structured paragraphs)\n" "**4. Recommendations** (Bullet points of suggestions)\n" "**5. Appendix (if applicable)**\n\n" "Do NOT include additional instructions on formatting.\n" "Dont add conclusions or any ending text!" f"\n\n{text}" ) completion = client.chat.completions.create( extra_body={}, model="deepseek/deepseek-r1:free", messages=[ {"role": "user", "content": prompt} ] ) if completion.choices[0].message.content: return completion.choices[0].message.content return completion.choices[0].message.content def save_structured_pdf(structured_text, output_pdf): """Save the structured text into a well-formatted PDF file using FPDF.""" pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() pdf.set_font("Arial", size=12) for line in structured_text.split("\n"): print(line) pdf.multi_cell(0, 10, line) pdf.output(output_pdf) def upload_to_cloudinary(file_path): cloudinary.config( cloud_name="dfdu3nobj", api_key="521777423999182", api_secret="cYnNmTOJahnLxTz80wrzzXuvZ88" ) upload_result = cloudinary.uploader.upload( "output.pdf", resource_type="image", type="upload", access_mode="public") print(upload_result) return upload_result.get("secure_url") def main(pdf1_url, pdf2_url, output_pdf): """Download, extract, summarize, and save summaries in a structured format to a PDF, then upload it.""" pdf1_path = "Final_Insurance_Report.pdf" pdf2_path = "Final_Service_Centre_Report.pdf" download_pdf(pdf1_url, pdf1_path) download_pdf(pdf2_url, pdf2_path) text1 = extract_text_from_pdf(pdf1_path) text2 = extract_text_from_pdf(pdf2_path) structured_summary1 = generate_structured_summary(text1) structured_summary2 = generate_structured_summary(text2) full_summary = f"{structured_summary1}n{structured_summary2}" save_structured_pdf(full_summary, output_pdf) print(f"Structured summaries saved to {output_pdf}") pdf_url = upload_to_cloudinary(output_pdf) print(f"PDF uploaded to: {pdf_url}") return pdf_url