ludigija commited on
Commit
b4b9a80
·
verified ·
1 Parent(s): 3424a59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -35
app.py CHANGED
@@ -50,24 +50,57 @@ def extract_text_from_pdf(uploaded_file):
50
  st.error(f"PDF extraction error: {str(e)}")
51
  return ""
52
 
53
- def highlight_differences(text1, text2):
54
- if not text1 or not text2:
55
- return ""
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  differ = difflib.Differ()
58
  diff = list(differ.compare(text1.split(), text2.split()))
59
-
60
- highlighted_text = ""
61
- for word in diff:
 
 
62
  if word.startswith("- "):
63
- highlighted_text += f'<span style="background-color:#ffcccc">{word[2:]}</span> '
 
 
 
 
 
 
 
 
64
  elif word.startswith("+ "):
65
- highlighted_text += f'<span style="background-color:#ccffcc">{word[2:]}</span> '
66
- elif word.startswith("? "):
67
- highlighted_text += f'<span style="background-color:#ffff99">{word[2:]}</span> '
68
- else:
69
- highlighted_text += word[2:] + " "
70
- return highlighted_text
 
 
 
 
 
 
 
 
71
 
72
  def calculate_similarity(text1, text2):
73
  if not text1.strip() or not text2.strip():
@@ -144,14 +177,14 @@ def main():
144
  # Update document displays
145
  if uploaded_file1:
146
  doc1_display.text_area("Document 1 Content",
147
- value=contract_text1,
148
- height=200,
149
- key="area1")
150
  if uploaded_file2:
151
  doc2_display.text_area("Document 2 Content",
152
- value=contract_text2,
153
- height=200,
154
- key="area2")
155
 
156
  if not (uploaded_file1 and uploaded_file2):
157
  st.warning("Please upload both documents to proceed")
@@ -159,6 +192,11 @@ def main():
159
 
160
  # ===== DOCUMENT COMPARISON SECTION =====
161
  st.header("2. Document Comparison")
 
 
 
 
 
162
  with st.expander("Show Document Differences", expanded=True):
163
  if st.button("Compare Documents"):
164
  with st.spinner("Analyzing documents..."):
@@ -167,27 +205,45 @@ def main():
167
  return
168
 
169
  similarity_score = calculate_similarity(contract_text1, contract_text2)
170
- highlighted_diff = highlight_differences(contract_text1, contract_text2)
171
 
172
- # Store results in session state
173
- st.session_state.comparison_results = {
174
- 'similarity_score': similarity_score,
175
- 'highlighted_diff': highlighted_diff
176
- }
177
-
178
- # Display comparison results if they exist
 
 
 
 
 
 
 
 
 
 
179
  if st.session_state.comparison_results:
180
  st.metric("Document Similarity Score",
181
- f"{st.session_state.comparison_results['similarity_score']:.2f}%")
182
 
183
  if st.session_state.comparison_results['similarity_score'] < 50:
184
  st.warning("Significant differences detected")
185
 
186
  st.markdown("**Visual Difference Highlighting:**")
187
- st.markdown(
188
- f'<div style="border:1px solid #ddd; padding:10px; max-height:400px; overflow-y:auto;">{st.session_state.comparison_results["highlighted_diff"]}</div>',
189
- unsafe_allow_html=True
190
- )
 
 
 
 
 
 
 
 
 
191
 
192
  # ===== QUESTION ANALYSIS SECTION =====
193
  st.header("3. Clause Analysis")
@@ -235,8 +291,8 @@ def main():
235
  except Exception as e:
236
  st.session_state.analysis_results = st.session_state.analysis_results or {}
237
  st.session_state.analysis_results['doc2'] = f"Analysis failed: {str(e)}"
238
-
239
- # Display analysis results if they exist
240
  if st.session_state.analysis_results:
241
  col1, col2 = st.columns(2)
242
  with col1:
@@ -248,4 +304,4 @@ def main():
248
  st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
249
 
250
  if __name__ == "__main__":
251
- main()
 
50
  st.error(f"PDF extraction error: {str(e)}")
51
  return ""
52
 
53
+ def highlight_differences_side_by_side(text1, text2):
54
+ differ = difflib.Differ()
55
+ diff = list(differ.compare(text1.splitlines(keepends=True), text2.splitlines(keepends=True)))
56
+
57
+ highlighted_text1 = ""
58
+ highlighted_text2 = ""
59
 
60
+ for line in diff:
61
+ if line.startswith("- "):
62
+ highlighted_text1 += f'<span style="background-color:#ffcccc">{line[2:]}</span>'
63
+ elif line.startswith("+ "):
64
+ highlighted_text2 += f'<span style="background-color:#ccffcc">{line[2:]}</span>'
65
+ elif line.startswith(" "):
66
+ highlighted_text1 += line[2:]
67
+ highlighted_text2 += line[2:]
68
+
69
+ return highlighted_text1, highlighted_text2
70
+
71
+ def highlight_differences_words(text1, text2):
72
  differ = difflib.Differ()
73
  diff = list(differ.compare(text1.split(), text2.split()))
74
+
75
+ highlighted_text1 = ""
76
+ highlighted_text2 = ""
77
+
78
+ for i, word in enumerate(diff):
79
  if word.startswith("- "):
80
+ removed_word = word[2:]
81
+ highlighted_text1 += f'<span style="background-color:#ffcccc; display: inline-block;">{removed_word}</span>'
82
+ # Check for corresponding addition to highlight as changed
83
+ if i + 1 < len(diff) and diff[i + 1].startswith("+ "):
84
+ added_word = diff[i + 1][2:]
85
+ highlighted_text2 += f'<span style="background-color:#ffffcc; display: inline-block;">{added_word}</span>'
86
+ diff[i + 1] = ' '
87
+ else:
88
+ highlighted_text2 += " "
89
  elif word.startswith("+ "):
90
+ added_word = word[2:]
91
+ highlighted_text2 += f'<span style="background-color:#ccffcc; display: inline-block;">{added_word}</span>'
92
+ # Check for corresponding removal
93
+ if i - 1 >= 0 and diff[i - 1].startswith("- "):
94
+ highlighted_text1 += f'<span style="background-color:#ffffcc; display: inline-block;">{diff[i-1][2:]}</span>'
95
+ diff[i-1] = ' '
96
+ else:
97
+ highlighted_text1 += " "
98
+
99
+ elif word.startswith(" "):
100
+ highlighted_text1 += word[2:] + " "
101
+ highlighted_text2 += word[2:] + " "
102
+
103
+ return highlighted_text1, highlighted_text2
104
 
105
  def calculate_similarity(text1, text2):
106
  if not text1.strip() or not text2.strip():
 
177
  # Update document displays
178
  if uploaded_file1:
179
  doc1_display.text_area("Document 1 Content",
180
+ value=contract_text1,
181
+ height=200,
182
+ key="area1")
183
  if uploaded_file2:
184
  doc2_display.text_area("Document 2 Content",
185
+ value=contract_text2,
186
+ height=200,
187
+ key="area2")
188
 
189
  if not (uploaded_file1 and uploaded_file2):
190
  st.warning("Please upload both documents to proceed")
 
192
 
193
  # ===== DOCUMENT COMPARISON SECTION =====
194
  st.header("2. Document Comparison")
195
+ comparison_format = st.radio(
196
+ "Comparison Output Format",
197
+ ["Side by Side", "Combined View"],
198
+ index=0
199
+ )
200
  with st.expander("Show Document Differences", expanded=True):
201
  if st.button("Compare Documents"):
202
  with st.spinner("Analyzing documents..."):
 
205
  return
206
 
207
  similarity_score = calculate_similarity(contract_text1, contract_text2)
 
208
 
209
+ if comparison_format == "Side by Side":
210
+ highlighted_diff1, highlighted_diff2 = highlight_differences_side_by_side(contract_text1, contract_text2)
211
+ st.session_state.comparison_results = {
212
+ 'similarity_score': similarity_score,
213
+ 'highlighted_diff1': highlighted_diff1,
214
+ 'highlighted_diff2': highlighted_diff2,
215
+ 'format': 'side_by_side'
216
+ }
217
+ else:
218
+ highlighted_diff = highlight_differences_words(contract_text1, contract_text2)
219
+ st.session_state.comparison_results = {
220
+ 'similarity_score': similarity_score,
221
+ 'highlighted_diff': highlighted_diff,
222
+ 'format': 'combined'
223
+ }
224
+
225
+ # Display comparison results
226
  if st.session_state.comparison_results:
227
  st.metric("Document Similarity Score",
228
+ f"{st.session_state.comparison_results['similarity_score']:.2f}%")
229
 
230
  if st.session_state.comparison_results['similarity_score'] < 50:
231
  st.warning("Significant differences detected")
232
 
233
  st.markdown("**Visual Difference Highlighting:**")
234
+ if st.session_state.comparison_results['format'] == 'side_by_side':
235
+ col1, col2 = st.columns(2)
236
+ with col1:
237
+ st.markdown("### Original Document")
238
+ st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
239
+ with col2:
240
+ st.markdown("### Modified Document")
241
+ st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
242
+ else:
243
+ st.markdown(
244
+ f'<div style="border:1px solid #ddd; padding:10px; max-height:400px; overflow-y:auto;">{st.session_state.comparison_results["highlighted_diff"]}</div>',
245
+ unsafe_allow_html=True
246
+ )
247
 
248
  # ===== QUESTION ANALYSIS SECTION =====
249
  st.header("3. Clause Analysis")
 
291
  except Exception as e:
292
  st.session_state.analysis_results = st.session_state.analysis_results or {}
293
  st.session_state.analysis_results['doc2'] = f"Analysis failed: {str(e)}"
294
+
295
+ # Display analysis results
296
  if st.session_state.analysis_results:
297
  col1, col2 = st.columns(2)
298
  with col1:
 
304
  st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
305
 
306
  if __name__ == "__main__":
307
+ main()