umangchaudhry commited on
Commit
8e115d0
·
verified ·
1 Parent(s): 1406a45

Upload 3 files

Browse files
Files changed (1) hide show
  1. app.py +14 -76
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import re
3
  import streamlit as st
4
  from io import BytesIO
5
  from tempfile import NamedTemporaryFile
@@ -11,8 +10,17 @@ from langchain_community.document_loaders import PyPDFLoader
11
  from langchain_community.vectorstores import FAISS
12
  from langchain_openai import OpenAIEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
- from xhtml2pdf import pisa
15
- from markdown import markdown
 
 
 
 
 
 
 
 
 
16
 
17
  # Function to process PDF, run Q&A, and return results
18
  def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
@@ -71,10 +79,10 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
71
  result = rag_chain.invoke({"input": question})
72
  answer = result["answer"]
73
 
74
- # Clean up the answer
75
- answer = clean_answer(answer)
76
 
77
- qa_text = f"### Question: {question}\n**Answer:**\n\n{answer}\n"
78
  qa_results.append(qa_text)
79
  # Update the placeholder with each new Q&A pair
80
  display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
@@ -84,63 +92,6 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
84
 
85
  return qa_results
86
 
87
- # Function to clean up the AI's answer
88
- def clean_answer(answer):
89
- # Remove unwanted prefixes like 'markdown'
90
- answer = answer.strip()
91
- if answer.lower().startswith('markdown'):
92
- answer = answer[len('markdown'):].strip()
93
-
94
- # Additional cleaning if necessary
95
- # For example, ensure that markdown table syntax is correct
96
- return answer
97
-
98
- # Function to convert markdown text to PDF with table support
99
- def md_to_pdf(md_text):
100
- # Convert markdown to HTML with table support
101
- html_content = markdown(md_text, output_format='html5', extensions=['tables'])
102
-
103
- # Define CSS styles for better table rendering
104
- css_styles = '''
105
- <style>
106
- body {
107
- font-family: Arial, sans-serif;
108
- font-size: 12pt;
109
- }
110
- table {
111
- border-collapse: collapse;
112
- width: 100%;
113
- }
114
- th, td {
115
- border: 1px solid black;
116
- padding: 8px;
117
- text-align: left;
118
- }
119
- th {
120
- background-color: #f2f2f2;
121
- }
122
- </style>
123
- '''
124
-
125
- # Construct the full HTML with CSS and content
126
- html = f'''
127
- <html>
128
- <head>
129
- {css_styles}
130
- </head>
131
- <body>
132
- {html_content}
133
- </body>
134
- </html>
135
- '''
136
-
137
- # Generate the PDF
138
- pdf = BytesIO()
139
- pisa_status = pisa.CreatePDF(html, dest=pdf)
140
- if pisa_status.err:
141
- return None
142
- return pdf.getvalue()
143
-
144
  # Streamlit app layout
145
  st.title("Climate Policy Summary Tool")
146
 
@@ -171,18 +122,5 @@ if st.button("Generate") and api_key and uploaded_file:
171
  file_name="qa_results.md",
172
  mime="text/markdown"
173
  )
174
-
175
- # Convert markdown to PDF
176
- pdf_bytes = md_to_pdf(markdown_text)
177
- if pdf_bytes:
178
- st.download_button(
179
- label="Download Results as PDF",
180
- data=pdf_bytes,
181
- file_name="qa_results.pdf",
182
- mime="application/pdf"
183
- )
184
- else:
185
- st.error("Error generating PDF")
186
-
187
  except Exception as e:
188
  st.error(f"An error occurred: {e}")
 
1
  import os
 
2
  import streamlit as st
3
  from io import BytesIO
4
  from tempfile import NamedTemporaryFile
 
10
  from langchain_community.vectorstores import FAISS
11
  from langchain_openai import OpenAIEmbeddings
12
  from langchain_text_splitters import RecursiveCharacterTextSplitter
13
+ import re
14
+
15
+ # Function to remove code block markers from the answer
16
+ def remove_code_blocks(text):
17
+ # Remove starting and ending code block markers
18
+ code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
19
+ match = re.match(code_block_pattern, text, re.DOTALL)
20
+ if match:
21
+ return match.group(1).strip()
22
+ else:
23
+ return text
24
 
25
  # Function to process PDF, run Q&A, and return results
26
  def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
 
79
  result = rag_chain.invoke({"input": question})
80
  answer = result["answer"]
81
 
82
+ # Remove code block markers from the answer
83
+ answer = remove_code_blocks(answer)
84
 
85
+ qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
86
  qa_results.append(qa_text)
87
  # Update the placeholder with each new Q&A pair
88
  display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
 
92
 
93
  return qa_results
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Streamlit app layout
96
  st.title("Climate Policy Summary Tool")
97
 
 
122
  file_name="qa_results.md",
123
  mime="text/markdown"
124
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  except Exception as e:
126
  st.error(f"An error occurred: {e}")