Spaces:

vanderbilt-dsi
/

climate-plan-summary-tool

Running

App Files Files Community

umangchaudhry commited on Oct 10, 2024

Commit

8e115d0

verified ·

1 Parent(s): 1406a45

Upload 3 files

Browse files

Files changed (1) hide show

app.py +14 -76

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import re
 import streamlit as st
 from io import BytesIO
 from tempfile import NamedTemporaryFile
@@ -11,8 +10,17 @@ from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from xhtml2pdf import pisa
-from markdown import markdown
 # Function to process PDF, run Q&A, and return results
 def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
@@ -71,10 +79,10 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
-        # Clean up the answer
-        answer = clean_answer(answer)
-        qa_text = f"### Question: {question}\n**Answer:**\n\n{answer}\n"
         qa_results.append(qa_text)
         # Update the placeholder with each new Q&A pair
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
@@ -84,63 +92,6 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
     return qa_results
-# Function to clean up the AI's answer
-def clean_answer(answer):
-    # Remove unwanted prefixes like 'markdown'
-    answer = answer.strip()
-    if answer.lower().startswith('markdown'):
-        answer = answer[len('markdown'):].strip()
-    # Additional cleaning if necessary
-    # For example, ensure that markdown table syntax is correct
-    return answer
-# Function to convert markdown text to PDF with table support
-def md_to_pdf(md_text):
-    # Convert markdown to HTML with table support
-    html_content = markdown(md_text, output_format='html5', extensions=['tables'])
-    # Define CSS styles for better table rendering
-    css_styles = '''
-    <style>
-    body {
-        font-family: Arial, sans-serif;
-        font-size: 12pt;
-    }
-    table {
-        border-collapse: collapse;
-        width: 100%;
-    }
-    th, td {
-        border: 1px solid black;
-        padding: 8px;
-        text-align: left;
-    }
-    th {
-        background-color: #f2f2f2;
-    }
-    </style>
-    '''
-    # Construct the full HTML with CSS and content
-    html = f'''
-    <html>
-    <head>
-    {css_styles}
-    </head>
-    <body>
-    {html_content}
-    </body>
-    </html>
-    '''
-    # Generate the PDF
-    pdf = BytesIO()
-    pisa_status = pisa.CreatePDF(html, dest=pdf)
-    if pisa_status.err:
-        return None
-    return pdf.getvalue()
 # Streamlit app layout
 st.title("Climate Policy Summary Tool")
@@ -171,18 +122,5 @@ if st.button("Generate") and api_key and uploaded_file:
                 file_name="qa_results.md",
                 mime="text/markdown"
             )
-            # Convert markdown to PDF
-            pdf_bytes = md_to_pdf(markdown_text)
-            if pdf_bytes:
-                st.download_button(
-                    label="Download Results as PDF",
-                    data=pdf_bytes,
-                    file_name="qa_results.pdf",
-                    mime="application/pdf"
-                )
-            else:
-                st.error("Error generating PDF")
         except Exception as e:
             st.error(f"An error occurred: {e}")

 import os
 import streamlit as st
 from io import BytesIO
 from tempfile import NamedTemporaryFile
 from langchain_community.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+import re
+# Function to remove code block markers from the answer
+def remove_code_blocks(text):
+    # Remove starting and ending code block markers
+    code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
+    match = re.match(code_block_pattern, text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    else:
+        return text
 # Function to process PDF, run Q&A, and return results
 def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
+        # Remove code block markers from the answer
+        answer = remove_code_blocks(answer)
+        qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
         qa_results.append(qa_text)
         # Update the placeholder with each new Q&A pair
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
     return qa_results
 # Streamlit app layout
 st.title("Climate Policy Summary Tool")
                 file_name="qa_results.md",
                 mime="text/markdown"
             )
         except Exception as e:
             st.error(f"An error occurred: {e}")