Spaces:

ludigija
/

crosscheck

Running

App Files Files Community

ludigija commited on Apr 14

Commit

4665d41

verified ·

1 Parent(s): 446457d

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -47

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pdfplumber
 import difflib
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 # ========== CONFIGURATION ==========
 st.set_page_config(
@@ -97,7 +98,7 @@ def highlight_differences_words(text1, text2):
 def calculate_similarity(text1, text2):
     if not text1.strip() or not text2.strip():
         return 0.0
     try:
         vectorizer = TfidfVectorizer(token_pattern=r'(?u)\b\w+\b')
         tfidf_matrix = vectorizer.fit_transform([text1, text2])
@@ -109,7 +110,7 @@ def calculate_similarity(text1, text2):
 def load_contract(file):
     if file is None:
         return ""
     ext = file.name.split('.')[-1].lower()
     try:
         if ext == 'txt':
@@ -152,36 +153,54 @@ def main():
     # ===== DOCUMENT UPLOAD SECTION =====
     st.header("1. Upload Documents")
     col1, col2 = st.columns(2)
     with col1:
         uploaded_file1 = st.file_uploader(
-            "Upload First Document",
             type=["txt", "pdf", "docx"],
             key="file1"
         )
         contract_text1 = load_contract(uploaded_file1) if uploaded_file1 else ""
-        doc1_display = st.empty()
     with col2:
         uploaded_file2 = st.file_uploader(
-            "Upload Second Document",
             type=["txt", "pdf", "docx"],
             key="file2"
         )
         contract_text2 = load_contract(uploaded_file2) if uploaded_file2 else ""
-        doc2_display = st.empty()
-    # Update document displays
     if uploaded_file1:
-        doc1_display.text_area("Document 1 Content",
-                            value=contract_text1,
-                            height=400,  # Increased height for larger display
-                            key="area1")
     if uploaded_file2:
-        doc2_display.text_area("Document 2 Content",
-                            value=contract_text2,
-                            height=400, # Increased height for larger display
-                            key="area2")
     if not (uploaded_file1 and uploaded_file2):
         st.warning("Please upload both documents to proceed")
@@ -189,48 +208,72 @@ def main():
     # ===== DOCUMENT COMPARISON SECTION =====
     st.header("2. Document Comparison")
     with st.expander("Show Document Differences", expanded=True):
         if st.button("Compare Documents"):
             with st.spinner("Analyzing documents..."):
                 if not contract_text1.strip() or not contract_text2.strip():
                     st.error("One or both documents appear to be empty or couldn't be read properly")
                     return
                 similarity_score = calculate_similarity(contract_text1, contract_text2)
                 highlighted_diff1, highlighted_diff2 = highlight_differences_words(contract_text1, contract_text2)
                 st.session_state.comparison_results = {
                     'similarity_score': similarity_score,
                     'highlighted_diff1': highlighted_diff1,
                     'highlighted_diff2': highlighted_diff2,
                 }
         # Display comparison results
         if st.session_state.comparison_results:
-            st.metric("Document Similarity Score",
-                      f"{st.session_state.comparison_results['similarity_score']:.2f}%")
-            if st.session_state.comparison_results['similarity_score'] < 50:
                 st.warning("Significant differences detected")
             st.markdown("**Visual Difference Highlighting:**")
-            col1, col2 = st.columns(2)
-            with col1:
                 st.markdown("### Original Document")
-                st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff1"]}</div>', unsafe_allow_html=True)
-            with col2:
                 st.markdown("### Modified Document")
-                st.markdown(f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;">{st.session_state.comparison_results["highlighted_diff2"]}</div>', unsafe_allow_html=True)
     # ===== QUESTION ANALYSIS SECTION =====
     st.header("3. Clause Analysis")
     try:
         question_selected = st.selectbox(
             'Select a legal question to analyze:',
@@ -248,10 +291,10 @@ def main():
         if not (contract_text1.strip() and contract_text2.strip()):
             st.error("Please ensure both documents have readable content")
             return
-        col1, col2 = st.columns(2)
-        with col1:
             st.subheader("First Document Analysis")
             with st.spinner('Processing first document...'):
                 try:
@@ -262,8 +305,8 @@ def main():
                 except Exception as e:
                     st.session_state.analysis_results = st.session_state.analysis_results or {}
                     st.session_state.analysis_results['doc1'] = f"Analysis failed: {str(e)}"
-        with col2:
             st.subheader("Second Document Analysis")
             with st.spinner('Processing second document...'):
                 try:
@@ -277,14 +320,14 @@ def main():
     # Display analysis results
     if st.session_state.analysis_results:
-        col1, col2 = st.columns(2)
-        with col1:
             st.subheader("First Document Analysis")
             st.success(st.session_state.analysis_results.get('doc1', 'No analysis performed yet'))
-        with col2:
             st.subheader("Second Document Analysis")
             st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
 if __name__ == "__main__":
-    main()

 import difflib
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+import streamlit.components.v1 as components
 # ========== CONFIGURATION ==========
 st.set_page_config(
 def calculate_similarity(text1, text2):
     if not text1.strip() or not text2.strip():
         return 0.0
     try:
         vectorizer = TfidfVectorizer(token_pattern=r'(?u)\b\w+\b')
         tfidf_matrix = vectorizer.fit_transform([text1, text2])
 def load_contract(file):
     if file is None:
         return ""
     ext = file.name.split('.')[-1].lower()
     try:
         if ext == 'txt':
     # ===== DOCUMENT UPLOAD SECTION =====
     st.header("1. Upload Documents")
     col1, col2 = st.columns(2)
     with col1:
         uploaded_file1 = st.file_uploader(
+            "Upload First Document",
             type=["txt", "pdf", "docx"],
             key="file1"
         )
         contract_text1 = load_contract(uploaded_file1) if uploaded_file1 else ""
+        doc1_container = st.empty()
     with col2:
         uploaded_file2 = st.file_uploader(
+            "Upload Second Document",
             type=["txt", "pdf", "docx"],
             key="file2"
         )
         contract_text2 = load_contract(uploaded_file2) if uploaded_file2 else ""
+        doc2_container = st.empty()
+    # Update document displays with synchronized scrolling
     if uploaded_file1:
+        doc1_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; height: 400px; overflow-y: auto;" id="doc1_text">{contract_text1}</div>'
+        doc1_container.markdown(doc1_content, unsafe_allow_html=True)
     if uploaded_file2:
+        doc2_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; height: 400px; overflow-y: auto;" id="doc2_text">{contract_text2}</div>'
+        doc2_container.markdown(doc2_content, unsafe_allow_html=True)
+    # JavaScript for synchronized scrolling
+    scroll_script = """
+    <script>
+    function syncScroll(id, otherId) {
+        var element = document.getElementById(id);
+        var otherElement = document.getElementById(otherId);
+        if (element && otherElement) {
+            element.addEventListener('scroll', function() {
+                otherElement.scrollTop = element.scrollTop;
+            });
+            otherElement.addEventListener('scroll', function() {
+                element.scrollTop = otherElement.scrollTop;
+            });
+        }
+    }
+    window.onload = function() {
+        syncScroll('doc1_text', 'doc2_text');
+    };
+    </script>
+    """
+    components.html(scroll_script, height=0)
     if not (uploaded_file1 and uploaded_file2):
         st.warning("Please upload both documents to proceed")
     # ===== DOCUMENT COMPARISON SECTION =====
     st.header("2. Document Comparison")
     with st.expander("Show Document Differences", expanded=True):
         if st.button("Compare Documents"):
             with st.spinner("Analyzing documents..."):
                 if not contract_text1.strip() or not contract_text2.strip():
                     st.error("One or both documents appear to be empty or couldn't be read properly")
                     return
                 similarity_score = calculate_similarity(contract_text1, contract_text2)
                 highlighted_diff1, highlighted_diff2 = highlight_differences_words(contract_text1, contract_text2)
                 st.session_state.comparison_results = {
                     'similarity_score': similarity_score,
                     'highlighted_diff1': highlighted_diff1,
                     'highlighted_diff2': highlighted_diff2,
                 }
         # Display comparison results
         if st.session_state.comparison_results:
+            st.metric("Document Similarity Score",
+                        f"{st.session_state.comparison_results['similarity_score']:.2f}%")
+            if st.session_state.comparison_results['similarity_score'] <= 70:
                 st.warning("Significant differences detected")
             st.markdown("**Visual Difference Highlighting:**")
+            col1_diff, col2_diff = st.columns(2)
+            with col1_diff:
                 st.markdown("### Original Document")
+                diff1_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;" id="diff1_text">{st.session_state.comparison_results["highlighted_diff1"]}</div>'
+                st.markdown(diff1_content, unsafe_allow_html=True)
+            with col2_diff:
                 st.markdown("### Modified Document")
+                diff2_content = f'<div style="border:1px solid #ccc; padding:10px; white-space: pre-wrap; font-family: monospace; font-size: 0.9em; max-height: 500px; overflow-y: auto;" id="diff2_text">{st.session_state.comparison_results["highlighted_diff2"]}</div>'
+                st.markdown(diff2_content, unsafe_allow_html=True)
+            # JavaScript for synchronized scrolling of diff panes
+            diff_scroll_script = """
+            <script>
+            function syncDiffScroll(id, otherId) {
+                var element = document.getElementById(id);
+                var otherElement = document.getElementById(otherId);
+                if (element && otherElement) {
+                    element.addEventListener('scroll', function() {
+                        otherElement.scrollTop = element.scrollTop;
+                    });
+                    otherElement.addEventListener('scroll', function() {
+                        element.scrollTop = otherElement.scrollTop;
+                    });
+                }
+            }
+            window.onload = function() {
+                syncDiffScroll('diff1_text', 'diff2_text');
+            };
+            </script>
+            """
+            components.html(diff_scroll_script, height=0)
     # ===== QUESTION ANALYSIS SECTION =====
     st.header("3. Clause Analysis")
     try:
         question_selected = st.selectbox(
             'Select a legal question to analyze:',
         if not (contract_text1.strip() and contract_text2.strip()):
             st.error("Please ensure both documents have readable content")
             return
+        col1_analysis, col2_analysis = st.columns(2)
+        with col1_analysis:
             st.subheader("First Document Analysis")
             with st.spinner('Processing first document...'):
                 try:
                 except Exception as e:
                     st.session_state.analysis_results = st.session_state.analysis_results or {}
                     st.session_state.analysis_results['doc1'] = f"Analysis failed: {str(e)}"
+        with col2_analysis:
             st.subheader("Second Document Analysis")
             with st.spinner('Processing second document...'):
                 try:
     # Display analysis results
     if st.session_state.analysis_results:
+        col1_answer, col2_answer = st.columns(2)
+        with col1_answer:
             st.subheader("First Document Analysis")
             st.success(st.session_state.analysis_results.get('doc1', 'No analysis performed yet'))
+        with col2_answer:
             st.subheader("Second Document Analysis")
             st.success(st.session_state.analysis_results.get('doc2', 'No analysis performed yet'))
 if __name__ == "__main__":
+    main()