Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -50,23 +50,7 @@ def extract_text_from_pdf(uploaded_file):
|
|
50 |
st.error(f"PDF extraction error: {str(e)}")
|
51 |
return ""
|
52 |
|
53 |
-
def highlight_differences_side_by_side(text1, text2):
|
54 |
-
differ = difflib.Differ()
|
55 |
-
diff = list(differ.compare(text1.splitlines(keepends=True), text2.splitlines(keepends=True)))
|
56 |
|
57 |
-
highlighted_text1 = ""
|
58 |
-
highlighted_text2 = ""
|
59 |
-
|
60 |
-
for line in diff:
|
61 |
-
if line.startswith("- "):
|
62 |
-
highlighted_text1 += f'<span style="background-color:#ffcccc">{line[2:]}</span>'
|
63 |
-
elif line.startswith("+ "):
|
64 |
-
highlighted_text2 += f'<span style="background-color:#ccffcc">{line[2:]}</span>'
|
65 |
-
elif line.startswith(" "):
|
66 |
-
highlighted_text1 += line[2:]
|
67 |
-
highlighted_text2 += line[2:]
|
68 |
-
|
69 |
-
return highlighted_text1, highlighted_text2
|
70 |
|
71 |
def highlight_differences_words(text1, text2):
|
72 |
differ = difflib.Differ()
|
@@ -82,8 +66,8 @@ def highlight_differences_words(text1, text2):
|
|
82 |
# Check for corresponding addition to highlight as changed
|
83 |
if i + 1 < len(diff) and diff[i + 1].startswith("+ "):
|
84 |
added_word = diff[i + 1][2:]
|
85 |
-
highlighted_text2 += f'<span style="background-color:#ffffcc; display: inline-block;">{added_word}</span>'
|
86 |
-
diff[i + 1] = ' '
|
87 |
else:
|
88 |
highlighted_text2 += " "
|
89 |
elif word.startswith("+ "):
|
@@ -91,7 +75,7 @@ def highlight_differences_words(text1, text2):
|
|
91 |
highlighted_text2 += f'<span style="background-color:#ccffcc; display: inline-block;">{added_word}</span>'
|
92 |
# Check for corresponding removal
|
93 |
if i - 1 >= 0 and diff[i - 1].startswith("- "):
|
94 |
-
highlighted_text1 += f'<span style="background-color:#ffffcc; display: inline-block;">{diff[i-1][2:]}</span>'
|
95 |
diff[i-1] = ' '
|
96 |
else:
|
97 |
highlighted_text1 += " "
|
@@ -101,7 +85,6 @@ def highlight_differences_words(text1, text2):
|
|
101 |
highlighted_text2 += word[2:] + " "
|
102 |
|
103 |
return highlighted_text1, highlighted_text2
|
104 |
-
|
105 |
def calculate_similarity(text1, text2):
|
106 |
if not text1.strip() or not text2.strip():
|
107 |
return 0.0
|
@@ -206,21 +189,15 @@ def main():
|
|
206 |
|
207 |
similarity_score = calculate_similarity(contract_text1, contract_text2)
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
highlighted_diff = highlight_differences_words(contract_text1, contract_text2)
|
219 |
-
st.session_state.comparison_results = {
|
220 |
-
'similarity_score': similarity_score,
|
221 |
-
'highlighted_diff': highlighted_diff,
|
222 |
-
'format': 'combined'
|
223 |
-
}
|
224 |
|
225 |
# Display comparison results
|
226 |
if st.session_state.comparison_results:
|
@@ -231,19 +208,15 @@ def main():
|
|
231 |
st.warning("Significant differences detected")
|
232 |
|
233 |
st.markdown("**Visual Difference Highlighting:**")
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
st.markdown(
|
244 |
-
f'<div style="border:1px solid #ddd; padding:10px; max-height:400px; overflow-y:auto;">{st.session_state.comparison_results["highlighted_diff"]}</div>',
|
245 |
-
unsafe_allow_html=True
|
246 |
-
)
|
247 |
|
248 |
# ===== QUESTION ANALYSIS SECTION =====
|
249 |
st.header("3. Clause Analysis")
|
|
|
50 |
st.error(f"PDF extraction error: {str(e)}")
|
51 |
return ""
|
52 |
|
|
|
|
|
|
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
def highlight_differences_words(text1, text2):
|
56 |
differ = difflib.Differ()
|
|
|
66 |
# Check for corresponding addition to highlight as changed
|
67 |
if i + 1 < len(diff) and diff[i + 1].startswith("+ "):
|
68 |
added_word = diff[i + 1][2:]
|
69 |
+
highlighted_text2 += f'<span style="background-color:#ffffcc; display: inline-block;">{added_word}</span>' # Yellow for changed in text2
|
70 |
+
diff[i + 1] = ' ' # Consume the addition
|
71 |
else:
|
72 |
highlighted_text2 += " "
|
73 |
elif word.startswith("+ "):
|
|
|
75 |
highlighted_text2 += f'<span style="background-color:#ccffcc; display: inline-block;">{added_word}</span>'
|
76 |
# Check for corresponding removal
|
77 |
if i - 1 >= 0 and diff[i - 1].startswith("- "):
|
78 |
+
highlighted_text1 += f'<span style="background-color:#ffffcc; display: inline-block;">{diff[i-1][2:]}</span>' # Yellow for changed in text1
|
79 |
diff[i-1] = ' '
|
80 |
else:
|
81 |
highlighted_text1 += " "
|
|
|
85 |
highlighted_text2 += word[2:] + " "
|
86 |
|
87 |
return highlighted_text1, highlighted_text2
|
|
|
88 |
def calculate_similarity(text1, text2):
|
89 |
if not text1.strip() or not text2.strip():
|
90 |
return 0.0
|
|
|
189 |
|
190 |
similarity_score = calculate_similarity(contract_text1, contract_text2)
|
191 |
|
192 |
+
|
193 |
+
highlighted_diff1, highlighted_diff2 = highlight_differences_words(contract_text1, contract_text2)
|
194 |
+
st.session_state.comparison_results = {
|
195 |
+
'similarity_score': similarity_score,
|
196 |
+
'highlighted_diff1': highlighted_diff1,
|
197 |
+
'highlighted_diff2': highlighted_diff2,
|
198 |
+
'format': 'side_by_side'
|
199 |
+
}
|
200 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
# Display comparison results
|
203 |
if st.session_state.comparison_results:
|
|
|
208 |
st.warning("Significant differences detected")
|
209 |
|
210 |
st.markdown("**Visual Difference Highlighting:**")
|
211 |
+
|
212 |
+
col1, col2 = st.columns(2)
|
213 |
+
with col1:
|
214 |
+
st.markdown("### Original Document")
|
215 |
+
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
|
216 |
+
with col2:
|
217 |
+
st.markdown("### Modified Document")
|
218 |
+
st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
|
219 |
+
|
|
|
|
|
|
|
|
|
220 |
|
221 |
# ===== QUESTION ANALYSIS SECTION =====
|
222 |
st.header("3. Clause Analysis")
|