Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,23 @@
|
|
1 |
-
import streamlit as st
|
2 |
import os
|
3 |
import logging
|
4 |
import faiss
|
5 |
-
import
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
7 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
-
from langchain.chains import RetrievalQA
|
9 |
from langchain.vectorstores import FAISS
|
|
|
|
|
10 |
|
11 |
# Set up logging
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
checkpoint = "LaMini-T5-738M"
|
16 |
|
17 |
@st.cache_resource
|
18 |
def load_llm():
|
|
|
19 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
20 |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
|
21 |
pipe = pipeline(
|
@@ -30,16 +32,17 @@ def load_llm():
|
|
30 |
return HuggingFacePipeline(pipeline=pipe)
|
31 |
|
32 |
def load_faiss_index():
|
|
|
33 |
index_path = "faiss_index/index.faiss"
|
34 |
if not os.path.exists(index_path):
|
35 |
st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
|
36 |
raise RuntimeError(f"FAISS index not found at {index_path}.")
|
37 |
-
|
38 |
try:
|
39 |
-
|
40 |
logger.info(f"FAISS index loaded successfully from {index_path}")
|
41 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
42 |
-
retriever = FAISS(
|
43 |
return retriever
|
44 |
except Exception as e:
|
45 |
st.error(f"Failed to load FAISS index: {e}")
|
@@ -47,6 +50,7 @@ def load_faiss_index():
|
|
47 |
raise
|
48 |
|
49 |
def process_answer(instruction):
|
|
|
50 |
try:
|
51 |
retriever = load_faiss_index()
|
52 |
llm = load_llm()
|
@@ -65,6 +69,7 @@ def process_answer(instruction):
|
|
65 |
return "An error occurred while processing your request.", {}
|
66 |
|
67 |
def main():
|
|
|
68 |
st.title("Search Your PDF ππ")
|
69 |
|
70 |
with st.expander("About the App"):
|
|
|
|
|
1 |
import os
|
2 |
import logging
|
3 |
import faiss
|
4 |
+
import streamlit as st
|
5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
7 |
from langchain.vectorstores import FAISS
|
8 |
+
from langchain_community.llms import HuggingFacePipeline
|
9 |
+
from langchain.chains import RetrievalQA
|
10 |
|
11 |
# Set up logging
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
+
# HuggingFace model checkpoint
|
16 |
checkpoint = "LaMini-T5-738M"
|
17 |
|
18 |
@st.cache_resource
|
19 |
def load_llm():
|
20 |
+
"""Load the language model for text generation."""
|
21 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
22 |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
|
23 |
pipe = pipeline(
|
|
|
32 |
return HuggingFacePipeline(pipeline=pipe)
|
33 |
|
34 |
def load_faiss_index():
|
35 |
+
"""Load the FAISS index for vector search."""
|
36 |
index_path = "faiss_index/index.faiss"
|
37 |
if not os.path.exists(index_path):
|
38 |
st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
|
39 |
raise RuntimeError(f"FAISS index not found at {index_path}.")
|
40 |
+
|
41 |
try:
|
42 |
+
index = faiss.read_index(index_path)
|
43 |
logger.info(f"FAISS index loaded successfully from {index_path}")
|
44 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
45 |
+
retriever = FAISS(index, embeddings)
|
46 |
return retriever
|
47 |
except Exception as e:
|
48 |
st.error(f"Failed to load FAISS index: {e}")
|
|
|
50 |
raise
|
51 |
|
52 |
def process_answer(instruction):
|
53 |
+
"""Process the user's question using the QA system."""
|
54 |
try:
|
55 |
retriever = load_faiss_index()
|
56 |
llm = load_llm()
|
|
|
69 |
return "An error occurred while processing your request.", {}
|
70 |
|
71 |
def main():
|
72 |
+
"""Main function to run the Streamlit application."""
|
73 |
st.title("Search Your PDF ππ")
|
74 |
|
75 |
with st.expander("About the App"):
|