ludigija commited on
Commit
4665d41
·
verified ·
1 Parent(s): 446457d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -47
app.py CHANGED
@@ -7,6 +7,7 @@ import pdfplumber
7
  import difflib
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.metrics.pairwise import cosine_similarity
 
10
 
11
  # ========== CONFIGURATION ==========
12
  st.set_page_config(
@@ -97,7 +98,7 @@ def highlight_differences_words(text1, text2):
97
  def calculate_similarity(text1, text2):
98
  if not text1.strip() or not text2.strip():
99
  return 0.0
100
-
101
  try:
102
  vectorizer = TfidfVectorizer(token_pattern=r'(?u)\b\w+\b')
103
  tfidf_matrix = vectorizer.fit_transform([text1, text2])
@@ -109,7 +110,7 @@ def calculate_similarity(text1, text2):
109
  def load_contract(file):
110
  if file is None:
111
  return ""
112
-
113
  ext = file.name.split('.')[-1].lower()
114
  try:
115
  if ext == 'txt':
@@ -152,36 +153,54 @@ def main():
152
  # ===== DOCUMENT UPLOAD SECTION =====
153
  st.header("1. Upload Documents")
154
  col1, col2 = st.columns(2)
155
-
156
  with col1:
157
  uploaded_file1 = st.file_uploader(
158
- "Upload First Document",
159
  type=["txt", "pdf", "docx"],
160
  key="file1"
161
  )
162
  contract_text1 = load_contract(uploaded_file1) if uploaded_file1 else ""
163
- doc1_display = st.empty()
164
-
165
  with col2:
166
  uploaded_file2 = st.file_uploader(
167
- "Upload Second Document",
168
  type=["txt", "pdf", "docx"],
169
  key="file2"
170
  )
171
  contract_text2 = load_contract(uploaded_file2) if uploaded_file2 else ""
172
- doc2_display = st.empty()
173
 
174
- # Update document displays
175
  if uploaded_file1:
176
- doc1_display.text_area("Document 1 Content",
177
- value=contract_text1,
178
- height=400, # Increased height for larger display
179
- key="area1")
180
  if uploaded_file2:
181
- doc2_display.text_area("Document 2 Content",
182
- value=contract_text2,
183
- height=400, # Increased height for larger display
184
- key="area2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  if not (uploaded_file1 and uploaded_file2):
187
  st.warning("Please upload both documents to proceed")
@@ -189,48 +208,72 @@ def main():
189
 
190
  # ===== DOCUMENT COMPARISON SECTION =====
191
  st.header("2. Document Comparison")
192
-
193
  with st.expander("Show Document Differences", expanded=True):
194
  if st.button("Compare Documents"):
195
  with st.spinner("Analyzing documents..."):
196
  if not contract_text1.strip() or not contract_text2.strip():
197
  st.error("One or both documents appear to be empty or couldn't be read properly")
198
  return
199
-
200
  similarity_score = calculate_similarity(contract_text1, contract_text2)
201
-
202
-
203
  highlighted_diff1, highlighted_diff2 = highlight_differences_words(contract_text1, contract_text2)
204
  st.session_state.comparison_results = {
205
  'similarity_score': similarity_score,
206
  'highlighted_diff1': highlighted_diff1,
207
  'highlighted_diff2': highlighted_diff2,
208
-
209
  }
210
-
211
 
212
  # Display comparison results
213
  if st.session_state.comparison_results:
214
- st.metric("Document Similarity Score",
215
- f"{st.session_state.comparison_results['similarity_score']:.2f}%")
216
-
217
- if st.session_state.comparison_results['similarity_score'] < 50:
218
  st.warning("Significant differences detected")
219
-
220
  st.markdown("**Visual Difference Highlighting:**")
221
-
222
- col1, col2 = st.columns(2)
223
- with col1:
224
  st.markdown("### Original Document")
225
- st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
226
- with col2:
 
227
  st.markdown("### Modified Document")
228
- st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
229
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  # ===== QUESTION ANALYSIS SECTION =====
232
  st.header("3. Clause Analysis")
233
-
234
  try:
235
  question_selected = st.selectbox(
236
  'Select a legal question to analyze:',
@@ -248,10 +291,10 @@ def main():
248
  if not (contract_text1.strip() and contract_text2.strip()):
249
  st.error("Please ensure both documents have readable content")
250
  return
251
-
252
- col1, col2 = st.columns(2)
253
-
254
- with col1:
255
  st.subheader("First Document Analysis")
256
  with st.spinner('Processing first document...'):
257
  try:
@@ -262,8 +305,8 @@ def main():
262
  except Exception as e:
263
  st.session_state.analysis_results = st.session_state.analysis_results or {}
264
  st.session_state.analysis_results['doc1'] = f"Analysis failed: {str(e)}"
265
-
266
- with col2:
267
  st.subheader("Second Document Analysis")
268
  with st.spinner('Processing second document...'):
269
  try:
@@ -277,14 +320,14 @@ def main():
277
 
278
  # Display analysis results
279
  if st.session_state.analysis_results:
280
- col1, col2 = st.columns(2)
281
- with col1:
282
  st.subheader("First Document Analysis")
283
  st.success(st.session_state.analysis_results.get('doc1', 'No analysis performed yet'))
284
-
285
- with col2:
286
  st.subheader("Second Document Analysis")
287
  st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
288
 
289
  if __name__ == "__main__":
290
- main()
 
7
  import difflib
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
+ import streamlit.components.v1 as components
11
 
12
  # ========== CONFIGURATION ==========
13
  st.set_page_config(
 
98
  def calculate_similarity(text1, text2):
99
  if not text1.strip() or not text2.strip():
100
  return 0.0
101
+
102
  try:
103
  vectorizer = TfidfVectorizer(token_pattern=r'(?u)\b\w+\b')
104
  tfidf_matrix = vectorizer.fit_transform([text1, text2])
 
110
  def load_contract(file):
111
  if file is None:
112
  return ""
113
+
114
  ext = file.name.split('.')[-1].lower()
115
  try:
116
  if ext == 'txt':
 
153
  # ===== DOCUMENT UPLOAD SECTION =====
154
  st.header("1. Upload Documents")
155
  col1, col2 = st.columns(2)
156
+
157
  with col1:
158
  uploaded_file1 = st.file_uploader(
159
+ "Upload First Document",
160
  type=["txt", "pdf", "docx"],
161
  key="file1"
162
  )
163
  contract_text1 = load_contract(uploaded_file1) if uploaded_file1 else ""
164
+ doc1_container = st.empty()
165
+
166
  with col2:
167
  uploaded_file2 = st.file_uploader(
168
+ "Upload Second Document",
169
  type=["txt", "pdf", "docx"],
170
  key="file2"
171
  )
172
  contract_text2 = load_contract(uploaded_file2) if uploaded_file2 else ""
173
+ doc2_container = st.empty()
174
 
175
+ # Update document displays with synchronized scrolling
176
  if uploaded_file1:
177
+ doc1_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; height: 400px; overflow-y: auto;" id="doc1_text">{contract_text1}</div>'
178
+ doc1_container.markdown(doc1_content, unsafe_allow_html=True)
 
 
179
  if uploaded_file2:
180
+ doc2_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; height: 400px; overflow-y: auto;" id="doc2_text">{contract_text2}</div>'
181
+ doc2_container.markdown(doc2_content, unsafe_allow_html=True)
182
+
183
+ # JavaScript for synchronized scrolling
184
+ scroll_script = """
185
+ <script>
186
+ function syncScroll(id, otherId) {
187
+ var element = document.getElementById(id);
188
+ var otherElement = document.getElementById(otherId);
189
+ if (element && otherElement) {
190
+ element.addEventListener('scroll', function() {
191
+ otherElement.scrollTop = element.scrollTop;
192
+ });
193
+ otherElement.addEventListener('scroll', function() {
194
+ element.scrollTop = otherElement.scrollTop;
195
+ });
196
+ }
197
+ }
198
+ window.onload = function() {
199
+ syncScroll('doc1_text', 'doc2_text');
200
+ };
201
+ </script>
202
+ """
203
+ components.html(scroll_script, height=0)
204
 
205
  if not (uploaded_file1 and uploaded_file2):
206
  st.warning("Please upload both documents to proceed")
 
208
 
209
  # ===== DOCUMENT COMPARISON SECTION =====
210
  st.header("2. Document Comparison")
211
+
212
  with st.expander("Show Document Differences", expanded=True):
213
  if st.button("Compare Documents"):
214
  with st.spinner("Analyzing documents..."):
215
  if not contract_text1.strip() or not contract_text2.strip():
216
  st.error("One or both documents appear to be empty or couldn't be read properly")
217
  return
218
+
219
  similarity_score = calculate_similarity(contract_text1, contract_text2)
220
+
221
+
222
  highlighted_diff1, highlighted_diff2 = highlight_differences_words(contract_text1, contract_text2)
223
  st.session_state.comparison_results = {
224
  'similarity_score': similarity_score,
225
  'highlighted_diff1': highlighted_diff1,
226
  'highlighted_diff2': highlighted_diff2,
227
+
228
  }
229
+
230
 
231
  # Display comparison results
232
  if st.session_state.comparison_results:
233
+ st.metric("Document Similarity Score",
234
+ f"{st.session_state.comparison_results['similarity_score']:.2f}%")
235
+
236
+ if st.session_state.comparison_results['similarity_score'] <= 70:
237
  st.warning("Significant differences detected")
238
+
239
  st.markdown("**Visual Difference Highlighting:**")
240
+
241
+ col1_diff, col2_diff = st.columns(2)
242
+ with col1_diff:
243
  st.markdown("### Original Document")
244
+ diff1_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;" id="diff1_text">{st.session_state.comparison_results["highlighted_diff1"]}</div>'
245
+ st.markdown(diff1_content, unsafe_allow_html=True)
246
+ with col2_diff:
247
  st.markdown("### Modified Document")
248
+ diff2_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;" id="diff2_text">{st.session_state.comparison_results["highlighted_diff2"]}</div>'
249
+ st.markdown(diff2_content, unsafe_allow_html=True)
250
+
251
+ # JavaScript for synchronized scrolling of diff panes
252
+ diff_scroll_script = """
253
+ <script>
254
+ function syncDiffScroll(id, otherId) {
255
+ var element = document.getElementById(id);
256
+ var otherElement = document.getElementById(otherId);
257
+ if (element && otherElement) {
258
+ element.addEventListener('scroll', function() {
259
+ otherElement.scrollTop = element.scrollTop;
260
+ });
261
+ otherElement.addEventListener('scroll', function() {
262
+ element.scrollTop = otherElement.scrollTop;
263
+ });
264
+ }
265
+ }
266
+ window.onload = function() {
267
+ syncDiffScroll('diff1_text', 'diff2_text');
268
+ };
269
+ </script>
270
+ """
271
+ components.html(diff_scroll_script, height=0)
272
+
273
 
274
  # ===== QUESTION ANALYSIS SECTION =====
275
  st.header("3. Clause Analysis")
276
+
277
  try:
278
  question_selected = st.selectbox(
279
  'Select a legal question to analyze:',
 
291
  if not (contract_text1.strip() and contract_text2.strip()):
292
  st.error("Please ensure both documents have readable content")
293
  return
294
+
295
+ col1_analysis, col2_analysis = st.columns(2)
296
+
297
+ with col1_analysis:
298
  st.subheader("First Document Analysis")
299
  with st.spinner('Processing first document...'):
300
  try:
 
305
  except Exception as e:
306
  st.session_state.analysis_results = st.session_state.analysis_results or {}
307
  st.session_state.analysis_results['doc1'] = f"Analysis failed: {str(e)}"
308
+
309
+ with col2_analysis:
310
  st.subheader("Second Document Analysis")
311
  with st.spinner('Processing second document...'):
312
  try:
 
320
 
321
  # Display analysis results
322
  if st.session_state.analysis_results:
323
+ col1_answer, col2_answer = st.columns(2)
324
+ with col1_answer:
325
  st.subheader("First Document Analysis")
326
  st.success(st.session_state.analysis_results.get('doc1', 'No analysis performed yet'))
327
+
328
+ with col2_answer:
329
  st.subheader("Second Document Analysis")
330
  st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
331
 
332
  if __name__ == "__main__":
333
+ main()