Job-Interview / knowledge_retrieval.py
ruslanmv's picture
First commit
5798cfc
import os
import fitz # PyMuPDF for PDF handling
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import Document, StrOutputParser
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain_core.runnables import RunnablePassthrough
from prompt_instructions import get_interview_prompt_hr, get_report_prompt_hr
# Function to load documents based on file type
def load_document(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == ".txt":
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
return [Document(page_content=text, metadata={"source": file_path})]
elif ext == ".pdf":
try:
with fitz.open(file_path) as pdf:
text = ""
for page in pdf:
text += page.get_text()
return [Document(page_content=text, metadata={"source": file_path})]
except Exception as e:
raise RuntimeError(f"Error loading PDF file: {e}")
else:
raise RuntimeError(f"Unsupported file format: {ext}")
# Function to set up knowledge retrieval
def setup_knowledge_retrieval(llm, language='english', file_path=None):
embedding_model = OpenAIEmbeddings()
if file_path:
# Load and split the document
documents = load_document(file_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
# Create a new FAISS index from the document
faiss_index_path = "knowledge/faiss_index_hr_documents"
try:
documents_faiss_index = FAISS.from_documents(texts, embedding_model)
documents_faiss_index.save_local(faiss_index_path)
print(f"New FAISS vector store created and saved at {faiss_index_path}")
except Exception as e:
raise RuntimeError(f"Error during FAISS index creation: {e}")
else:
raise RuntimeError("No document provided for knowledge retrieval setup.")
documents_retriever = documents_faiss_index.as_retriever()
# Prompt template for the interview
interview_prompt_template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
interview_prompt = PromptTemplate.from_template(interview_prompt_template)
# Prompt template for the report
report_prompt_template = """
Use the following pieces of context to generate a report at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
report_prompt = PromptTemplate.from_template(report_prompt_template)
# Create RetrievalQA chains
interview_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=documents_retriever,
chain_type_kwargs={"prompt": interview_prompt}
)
report_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=documents_retriever,
chain_type_kwargs={"prompt": report_prompt}
)
return interview_chain, report_chain, documents_retriever
def get_next_response(interview_chain, message, history, question_count):
if question_count >= 5:
return "Thank you for your responses. I will now prepare a report."
if not interview_chain:
return "Error: Knowledge base not loaded. Please contact an admin."
# Generate the next question using RetrievalQA
response = interview_chain.invoke({"query": message})
next_question = response.get("result", "Could you provide more details on that?")
return next_question
def generate_report(report_chain, history, language):
combined_history = "\n".join(history)
# If report_chain is not available, return a fallback report
if not report_chain:
print("[DEBUG] Report chain not available. Generating a fallback HR report.")
fallback_report = f"""
HR Report in {language}:
Interview Summary:
{combined_history}
Assessment:
Based on the responses, the candidate's strengths, areas for improvement, and overall fit for the role have been noted. No additional knowledge-based insights due to missing vector database.
"""
return fallback_report
# Generate report using the retrieval chain
result = report_chain.invoke({"query": f"Please provide an HR report based on the interview in {language}. Interview history: {combined_history}"})
return result.get("result", "Unable to generate report due to insufficient information.")
def get_initial_question(interview_chain):
if not interview_chain:
return "Please introduce yourself and tell me a little bit about your professional background."
result = interview_chain.invoke({"query": "What should be the first question in an HR interview?"})
return result.get("result", "Could you tell me a little bit about yourself and your professional background?")