ZeeAI1 commited on
Commit
b07617d
·
verified ·
1 Parent(s): 8513d65

Create apps.py

Browse files
Files changed (1) hide show
  1. apps.py +159 -0
apps.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pdfplumber
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from transformers import pipeline
9
+
10
+ # Set up the page configuration
11
+ st.set_page_config(page_title="RAG-based PDF Chat", layout="centered", page_icon="📄")
12
+
13
+ # Load the summarization pipeline model
14
+ @st.cache_resource
15
+ def load_summarization_pipeline():
16
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
+ return summarizer
18
+
19
+ summarizer = load_summarization_pipeline()
20
+
21
+ # Load the translation pipeline model
22
+ @st.cache_resource
23
+ def load_translation_pipeline(target_lang):
24
+ translation_model = f"Helsinki-NLP/opus-mt-en-{target_lang}"
25
+ translator = pipeline("translation", model=translation_model)
26
+ return translator
27
+
28
+ # Split text into manageable chunks
29
+ @st.cache_data
30
+ def get_text_chunks(text):
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
32
+ chunks = text_splitter.split_text(text)
33
+ return chunks
34
+
35
+ # Initialize embedding function
36
+ embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
+
38
+ # Create a FAISS vector store with embeddings, checking for empty chunks
39
+ @st.cache_resource
40
+ def load_or_create_vector_store(text_chunks):
41
+ if not text_chunks:
42
+ st.error("No valid text chunks found to create a vector store. Please check your PDF files.")
43
+ return None
44
+ vector_store = FAISS.from_texts(text_chunks, embedding=embedding_function)
45
+ return vector_store
46
+
47
+ # Helper function to process a single PDF
48
+ def process_single_pdf(file_path):
49
+ text = ""
50
+ try:
51
+ with pdfplumber.open(file_path) as pdf:
52
+ for page in pdf.pages:
53
+ page_text = page.extract_text()
54
+ if page_text:
55
+ text += page_text
56
+ except Exception as e:
57
+ st.error(f"Failed to read PDF: {file_path} - {e}")
58
+ return text
59
+
60
+ # Function to load PDFs with progress display
61
+ def load_pdfs_with_progress(folder_path):
62
+ all_text = ""
63
+ pdf_files = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.endswith('.pdf')]
64
+ num_files = len(pdf_files)
65
+
66
+ if num_files == 0:
67
+ st.error("No PDF files found in the specified folder.")
68
+ st.session_state['vector_store'] = None
69
+ st.session_state['loading'] = False
70
+ return
71
+
72
+ # Title for the progress bar
73
+ st.markdown("### Loading data...")
74
+ progress_bar = st.progress(0)
75
+ status_text = st.empty()
76
+
77
+ processed_count = 0
78
+
79
+ for file_path in pdf_files:
80
+ result = process_single_pdf(file_path)
81
+ all_text += result
82
+ processed_count += 1
83
+ progress_percentage = int((processed_count / num_files) * 100)
84
+ progress_bar.progress(processed_count / num_files)
85
+ status_text.text(f"Loading documents: {progress_percentage}% completed")
86
+
87
+ progress_bar.empty() # Remove the progress bar when done
88
+ status_text.text("Document loading completed!") # Show completion message
89
+
90
+ if all_text:
91
+ text_chunks = get_text_chunks(all_text)
92
+ vector_store = load_or_create_vector_store(text_chunks)
93
+ st.session_state['vector_store'] = vector_store
94
+ else:
95
+ st.session_state['vector_store'] = None
96
+
97
+ st.session_state['loading'] = False # Mark loading as complete
98
+
99
+ # Generate summary based on the retrieved text
100
+ def generate_summary_with_huggingface(query, retrieved_text):
101
+ summarization_input = f"{query} Related information:{retrieved_text}"
102
+ max_input_length = 1024
103
+ summarization_input = summarization_input[:max_input_length]
104
+ summary = summarizer(summarization_input, max_length=500, min_length=50, do_sample=False)
105
+ return summary[0]["summary_text"]
106
+
107
+ # Translate the summary if a language is selected
108
+ def translate_summary(summary, target_lang):
109
+ if target_lang == "en":
110
+ return summary
111
+ translator = load_translation_pipeline(target_lang)
112
+ translated_summary = translator(summary, max_length=500)[0]["translation_text"]
113
+ return translated_summary
114
+
115
+ # Generate response for user query
116
+ def user_input(user_question, target_lang):
117
+ vector_store = st.session_state.get('vector_store')
118
+ if vector_store is None:
119
+ return "The app is still loading documents or no documents were successfully loaded."
120
+ docs = vector_store.similarity_search(user_question)
121
+ context_text = " ".join([doc.page_content for doc in docs])
122
+ summary = generate_summary_with_huggingface(user_question, context_text)
123
+ return translate_summary(summary, target_lang)
124
+
125
+ # Main function to run the Streamlit app
126
+ def main():
127
+ # Use HTML to style the title with a larger font size
128
+ st.markdown(
129
+ """
130
+ <h1 style="font-size:30px; text-align: center;">
131
+ 📄 JusticeCompass: Your AI-Powered Legal Navigator for Swift, Accurate Guidance.
132
+ </h1>
133
+ """,
134
+ unsafe_allow_html=True
135
+ )
136
+
137
+ # Start loading documents if not already loaded
138
+ if 'loading' not in st.session_state or st.session_state['loading']:
139
+ st.session_state['loading'] = True
140
+ load_pdfs_with_progress('documents1')
141
+
142
+ user_question = st.text_input("Ask a Question:", placeholder="Type your question here...")
143
+
144
+ # Language selection
145
+ target_lang = st.selectbox("Select Output Language:", options=["en", "ur", "es", "zh"], format_func=lambda lang: {"en": "English", "ur": "Urdu", "es": "Spanish", "zh": "Chinese"}[lang])
146
+
147
+ if st.session_state.get('loading', True):
148
+ st.info("The app is loading documents in the background. You can type your question now and submit once loading is complete.")
149
+
150
+ if st.button("Get Response"):
151
+ if not user_question:
152
+ st.warning("Please enter a question before submitting.")
153
+ else:
154
+ with st.spinner("Generating response..."):
155
+ answer = user_input(user_question, target_lang)
156
+ st.markdown(f"**🤖 AI:** {answer}")
157
+
158
+ if __name__ == "__main__":
159
+ main()