Spaces:

vanderbilt-dsi
/

climate-plan-summary-tool

Running

App Files Files Community

umangchaudhry commited on Oct 10, 2024

Commit

318c146

verified ·

1 Parent(s): d094df2

Upload 2 files

Browse files

Files changed (2) hide show

app.py +82 -27
summary_tool_system_prompt.md +1 -0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
 import streamlit as st
 from io import BytesIO
 from tempfile import NamedTemporaryFile
@@ -8,11 +8,11 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from langchain_community.document_loaders import PyPDFLoader
-from langchain.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from reportlab.lib.pagesizes import letter
-from reportlab.pdfgen import canvas
 # Function to process PDF, run Q&A, and return results
 def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
@@ -70,7 +70,11 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
     for question in questions:
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
-        qa_text = f"### Question: {question}\n**Answer:** {answer}\n"
         qa_results.append(qa_text)
         # Update the placeholder with each new Q&A pair
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
@@ -80,24 +84,62 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
     return qa_results
-# Function to create a PDF using reportlab
-def create_pdf(content):
-    buffer = BytesIO()
-    pdf = canvas.Canvas(buffer, pagesize=letter)
-    pdf.setFont("Helvetica", 10)
-    # Start position for writing text
-    text = pdf.beginText(40, 750)
-    for line in content.split("\n"):
-        text.textLine(line)
-    pdf.drawText(text)
-    pdf.showPage()
-    pdf.save()
-    buffer.seek(0)
-    return buffer
 # Streamlit app layout
 st.title("Climate Policy Summary Tool")
@@ -122,12 +164,25 @@ if st.button("Generate") and api_key and uploaded_file:
             results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
             # Allow the user to download the results as a Markdown file
-            markdown_output = "\n".join(results)
-            st.download_button("Download as Markdown", markdown_output, file_name="results.md")
-            # Create a PDF file for the user to download
-            pdf_output = create_pdf(markdown_output)
-            st.download_button("Download as PDF", data=pdf_output, file_name="results.pdf")
         except Exception as e:
             st.error(f"An error occurred: {e}")

 import os
+import re
 import streamlit as st
 from io import BytesIO
 from tempfile import NamedTemporaryFile
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from xhtml2pdf import pisa
+from markdown import markdown
 # Function to process PDF, run Q&A, and return results
 def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
     for question in questions:
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
+        # Clean up the answer
+        answer = clean_answer(answer)
+        qa_text = f"### Question: {question}\n**Answer:**\n\n{answer}\n"
         qa_results.append(qa_text)
         # Update the placeholder with each new Q&A pair
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
     return qa_results
+# Function to clean up the AI's answer
+def clean_answer(answer):
+    # Remove unwanted prefixes like 'markdown'
+    answer = answer.strip()
+    if answer.lower().startswith('markdown'):
+        answer = answer[len('markdown'):].strip()
+    # Additional cleaning if necessary
+    # For example, ensure that markdown table syntax is correct
+    return answer
+# Function to convert markdown text to PDF with table support
+def md_to_pdf(md_text):
+    # Convert markdown to HTML with table support
+    html_content = markdown(md_text, output_format='html5', extensions=['tables'])
+    # Define CSS styles for better table rendering
+    css_styles = '''
+    <style>
+    body {
+        font-family: Arial, sans-serif;
+        font-size: 12pt;
+    }
+    table {
+        border-collapse: collapse;
+        width: 100%;
+    }
+    th, td {
+        border: 1px solid black;
+        padding: 8px;
+        text-align: left;
+    }
+    th {
+        background-color: #f2f2f2;
+    }
+    </style>
+    '''
+    # Construct the full HTML with CSS and content
+    html = f'''
+    <html>
+    <head>
+    {css_styles}
+    </head>
+    <body>
+    {html_content}
+    </body>
+    </html>
+    '''
+    # Generate the PDF
+    pdf = BytesIO()
+    pisa_status = pisa.CreatePDF(html, dest=pdf)
+    if pisa_status.err:
+        return None
+    return pdf.getvalue()
 # Streamlit app layout
 st.title("Climate Policy Summary Tool")
             results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
             # Allow the user to download the results as a Markdown file
+            markdown_text = "\n".join(results)
+            st.download_button(
+                label="Download Results as Markdown",
+                data=markdown_text,
+                file_name="qa_results.md",
+                mime="text/markdown"
+            )
+            # Convert markdown to PDF
+            pdf_bytes = md_to_pdf(markdown_text)
+            if pdf_bytes:
+                st.download_button(
+                    label="Download Results as PDF",
+                    data=pdf_bytes,
+                    file_name="qa_results.pdf",
+                    mime="application/pdf"
+                )
+            else:
+                st.error("Error generating PDF")
         except Exception as e:
             st.error(f"An error occurred: {e}")

summary_tool_system_prompt.md CHANGED Viewed

@@ -6,6 +6,7 @@ Your task is to analyze the plan and answer a consistent set of questions based
 - **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
 - **Format all responses using Markdown syntax.**
 - **Responses should be well formatted. Use bold, italics, and bullet points where appropriate.**
 ### **Definitions**

 - **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
 - **Format all responses using Markdown syntax.**
 - **Responses should be well formatted. Use bold, italics, and bullet points where appropriate.**
+- **Respond directly to the questions asked. Do not include any other text or comments.**
 ### **Definitions**