Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -44,8 +44,17 @@ def load_questions_short():
|
|
44 |
def extract_text_from_pdf(uploaded_file):
|
45 |
try:
|
46 |
with pdfplumber.open(uploaded_file) as pdf:
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
st.error(f"PDF extraction error: {str(e)}")
|
51 |
return ""
|
@@ -110,7 +119,12 @@ def load_contract(file):
|
|
110 |
if not content:
|
111 |
# Fallback to PyPDF4
|
112 |
pdfReader = PyPDF4.PdfFileReader(file)
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
114 |
elif ext == 'docx':
|
115 |
content = docx2txt.process(file)
|
116 |
else:
|
@@ -161,12 +175,12 @@ def main():
|
|
161 |
if uploaded_file1:
|
162 |
doc1_display.text_area("Document 1 Content",
|
163 |
value=contract_text1,
|
164 |
-
height=
|
165 |
key="area1")
|
166 |
if uploaded_file2:
|
167 |
doc2_display.text_area("Document 2 Content",
|
168 |
value=contract_text2,
|
169 |
-
height=
|
170 |
key="area2")
|
171 |
|
172 |
if not (uploaded_file1 and uploaded_file2):
|
@@ -208,10 +222,10 @@ def main():
|
|
208 |
col1, col2 = st.columns(2)
|
209 |
with col1:
|
210 |
st.markdown("### Original Document")
|
211 |
-
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
|
212 |
with col2:
|
213 |
st.markdown("### Modified Document")
|
214 |
-
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
|
215 |
|
216 |
|
217 |
# ===== QUESTION ANALYSIS SECTION =====
|
|
|
44 |
def extract_text_from_pdf(uploaded_file):
|
45 |
try:
|
46 |
with pdfplumber.open(uploaded_file) as pdf:
|
47 |
+
full_text = ""
|
48 |
+
for page in pdf.pages:
|
49 |
+
try:
|
50 |
+
text = page.extract_text_formatted() # Try to get formatted text
|
51 |
+
except AttributeError:
|
52 |
+
text = page.extract_text()
|
53 |
+
if text:
|
54 |
+
full_text += text + "\n\n" # Add page separator
|
55 |
+
else:
|
56 |
+
full_text += page.extract_text() + "\n\n"
|
57 |
+
return full_text if full_text.strip() else ""
|
58 |
except Exception as e:
|
59 |
st.error(f"PDF extraction error: {str(e)}")
|
60 |
return ""
|
|
|
119 |
if not content:
|
120 |
# Fallback to PyPDF4
|
121 |
pdfReader = PyPDF4.PdfFileReader(file)
|
122 |
+
full_text = ""
|
123 |
+
for page in pdfReader.pages:
|
124 |
+
text = page.extractText()
|
125 |
+
if text:
|
126 |
+
full_text += text + "\n\n"
|
127 |
+
content = full_text
|
128 |
elif ext == 'docx':
|
129 |
content = docx2txt.process(file)
|
130 |
else:
|
|
|
175 |
if uploaded_file1:
|
176 |
doc1_display.text_area("Document 1 Content",
|
177 |
value=contract_text1,
|
178 |
+
height=400, # Increased height for larger display
|
179 |
key="area1")
|
180 |
if uploaded_file2:
|
181 |
doc2_display.text_area("Document 2 Content",
|
182 |
value=contract_text2,
|
183 |
+
height=400, # Increased height for larger display
|
184 |
key="area2")
|
185 |
|
186 |
if not (uploaded_file1 and uploaded_file2):
|
|
|
222 |
col1, col2 = st.columns(2)
|
223 |
with col1:
|
224 |
st.markdown("### Original Document")
|
225 |
+
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
|
226 |
with col2:
|
227 |
st.markdown("### Modified Document")
|
228 |
+
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
|
229 |
|
230 |
|
231 |
# ===== QUESTION ANALYSIS SECTION =====
|