DebabrataHalder commited on
Commit
452b169
·
verified ·
1 Parent(s): 49cf98b

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -130
app.py DELETED
@@ -1,130 +0,0 @@
1
- import os
2
- import logging
3
- import time
4
- from dotenv import load_dotenv
5
- import streamlit as st
6
- from PyPDF2 import PdfReader
7
- from langchain.text_splitter import CharacterTextSplitter
8
- from langchain_cohere import CohereEmbeddings
9
- from langchain.vectorstores import FAISS
10
- from langchain.memory import ConversationBufferMemory
11
- from langchain.chains import ConversationalRetrievalChain
12
- from langchain_groq import ChatGroq
13
-
14
- # Load environment variables
15
- load_dotenv()
16
-
17
- # Set up logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format="%(asctime)s - %(levelname)s - %(message)s"
21
- )
22
-
23
- # Function to extract text from PDF files
24
- def get_pdf_text(pdf_docs):
25
- text = ""
26
- for pdf in pdf_docs:
27
- pdf_reader = PdfReader(pdf)
28
- for page in pdf_reader.pages:
29
- text += page.extract_text()
30
- return text
31
-
32
- # Function to split the extracted text into chunks
33
- def get_text_chunks(text):
34
- text_splitter = CharacterTextSplitter(
35
- separator="\n",
36
- chunk_size=1000,
37
- chunk_overlap=200,
38
- length_function=len
39
- )
40
- chunks = text_splitter.split_text(text)
41
- return chunks
42
-
43
- # Function to create a FAISS vectorstore with rate-limiting and retry logic
44
- def get_vectorstore(text_chunks):
45
- cohere_api_key = os.getenv("COHERE_API_KEY")
46
- embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
47
-
48
- vectorstore = None
49
- batch_size = 10 # Process chunks in batches of 10
50
- for i in range(0, len(text_chunks), batch_size):
51
- batch = text_chunks[i:i+batch_size]
52
- retry_count = 0
53
-
54
- while retry_count < 5: # Retry up to 5 times
55
- try:
56
- if vectorstore is None:
57
- vectorstore = FAISS.from_texts(texts=batch, embedding=embeddings)
58
- else:
59
- vectorstore.add_texts(batch, embedding=embeddings)
60
- break # Exit retry loop if successful
61
- except Exception as e:
62
- if "rate limit" in str(e).lower():
63
- logging.warning(f"Rate limit exceeded. Retrying batch {i//batch_size + 1} in {2 ** retry_count} seconds...")
64
- time.sleep(2 ** retry_count) # Exponential backoff
65
- retry_count += 1
66
- else:
67
- raise e # Raise other errors
68
- return vectorstore
69
-
70
- # Function to set up the conversational retrieval chain
71
- def get_conversation_chain(vectorstore):
72
- try:
73
- llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
74
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
75
-
76
- conversation_chain = ConversationalRetrievalChain.from_llm(
77
- llm=llm,
78
- retriever=vectorstore.as_retriever(),
79
- memory=memory
80
- )
81
-
82
- logging.info("Conversation chain created successfully.")
83
- return conversation_chain
84
- except Exception as e:
85
- logging.error(f"Error creating conversation chain: {e}")
86
- st.error("An error occurred while setting up the conversation chain.")
87
-
88
- # Handle user input
89
- def handle_userinput(user_question):
90
- if st.session_state.conversation is not None:
91
- response = st.session_state.conversation({"question": user_question})
92
- st.session_state.chat_history = response["chat_history"]
93
-
94
- for i, message in enumerate(st.session_state.chat_history):
95
- if i % 2 == 0:
96
- st.write(f"*User:* {message.content}")
97
- else:
98
- st.write(f"*Bot:* {message.content}")
99
- else:
100
- st.warning("Please process the documents first.")
101
-
102
- # Main function to run the Streamlit app
103
- def main():
104
- load_dotenv()
105
- st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
106
-
107
- if "conversation" not in st.session_state:
108
- st.session_state.conversation = None
109
- if "chat_history" not in st.session_state:
110
- st.session_state.chat_history = None
111
-
112
- st.header("Chat with multiple PDFs :books:")
113
- user_question = st.text_input("Ask a question about your documents:")
114
- if user_question:
115
- handle_userinput(user_question)
116
-
117
- with st.sidebar:
118
- st.subheader("Your documents")
119
- pdf_docs = st.file_uploader(
120
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
121
- )
122
- if st.button("Process"):
123
- with st.spinner("Processing..."):
124
- raw_text = get_pdf_text(pdf_docs)
125
- text_chunks = get_text_chunks(raw_text)
126
- vectorstore = get_vectorstore(text_chunks)
127
- st.session_state.conversation = get_conversation_chain(vectorstore)
128
-
129
- if __name__ == "__main__":
130
- main()