Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

App Files Files Community

AI-Powered-PDF-Document-Search-and-QA / app.py

jarif

Upload app.py

e1f0f6b verified about 1 year ago

raw

history blame

3.78 kB

	import streamlit as st
	import os
	import faiss
	import logging
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_community.llms import HuggingFacePipeline
	from langchain.chains import RetrievalQA
	from ingest import create_faiss_index

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	checkpoint = "LaMini-T5-738M"

	@st.cache_resource
	def load_llm():
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	pipe = pipeline(
	'text2text-generation',
	model=model,
	tokenizer=tokenizer,
	max_length=256,
	do_sample=True,
	temperature=0.3,
	top_p=0.95
	)
	return HuggingFacePipeline(pipeline=pipe)

	def validate_index_file(index_path):
	try:
	with open(index_path, 'rb') as f:
	data = f.read(100)
	logger.info(f"Successfully read {len(data)} bytes from the index file")
	return True
	except Exception as e:
	logger.error(f"Error validating index file: {e}")
	return False

	def load_faiss_index():
	index_path = "faiss_index/index.faiss"
	if not os.path.exists(index_path):
	st.warning("Index file not found. Creating a new one...")
	create_faiss_index()

	if not os.path.exists(index_path):
	st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
	raise RuntimeError("FAISS index creation failed.")

	try:
	index = faiss.read_index(index_path)
	if index is None:
	raise ValueError("Failed to read FAISS index.")

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = FAISS.load_local("faiss_index", embeddings)
	if db.index is None or db.index_to_docstore_id is None:
	raise ValueError("FAISS index or docstore_id mapping is None.")

	return db.as_retriever()
	except Exception as e:
	st.error(f"Failed to load FAISS index: {e}")
	logger.exception("Exception in load_faiss_index")
	raise

	def process_answer(instruction):
	try:
	retriever = load_faiss_index()
	llm = load_llm()
	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)
	generated_text = qa.invoke(instruction)
	answer = generated_text['result']
	return answer, generated_text
	except Exception as e:
	st.error(f"An error occurred while processing the answer: {e}")
	logger.exception("Exception in process_answer")
	return "An error occurred while processing your request.", {}

	def main():
	st.title("Search Your PDF 📚📝")

	with st.expander("About the App"):
	st.markdown(
	"""
	This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
	"""
	)

	question = st.text_area("Enter your Question")

	if st.button("Ask"):
	st.info("Your Question: " + question)
	st.info("Your Answer")
	try:
	answer, metadata = process_answer(question)
	st.write(answer)
	st.write(metadata)
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	logger.exception("Unexpected error in main function")

	if __name__ == '__main__':
	main()