Spaces:
Running
Running
import os | |
import fitz # PyMuPDF for PDF handling | |
from langchain_community.vectorstores import FAISS | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.prompts import ChatPromptTemplate, PromptTemplate | |
from langchain.schema import Document, StrOutputParser | |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain | |
from langchain.chains import RetrievalQA | |
from langchain.chains.llm import LLMChain | |
from langchain_core.runnables import RunnablePassthrough | |
from prompt_instructions import get_interview_prompt_hr, get_report_prompt_hr | |
# Function to load documents based on file type | |
def load_document(file_path): | |
ext = os.path.splitext(file_path)[1].lower() | |
if ext == ".txt": | |
with open(file_path, "r", encoding="utf-8") as f: | |
text = f.read() | |
return [Document(page_content=text, metadata={"source": file_path})] | |
elif ext == ".pdf": | |
try: | |
with fitz.open(file_path) as pdf: | |
text = "" | |
for page in pdf: | |
text += page.get_text() | |
return [Document(page_content=text, metadata={"source": file_path})] | |
except Exception as e: | |
raise RuntimeError(f"Error loading PDF file: {e}") | |
else: | |
raise RuntimeError(f"Unsupported file format: {ext}") | |
# Function to set up knowledge retrieval | |
def setup_knowledge_retrieval(llm, language='english', file_path=None): | |
embedding_model = OpenAIEmbeddings() | |
if file_path: | |
# Load and split the document | |
documents = load_document(file_path) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
texts = text_splitter.split_documents(documents) | |
# Create a new FAISS index from the document | |
faiss_index_path = "knowledge/faiss_index_hr_documents" | |
try: | |
documents_faiss_index = FAISS.from_documents(texts, embedding_model) | |
documents_faiss_index.save_local(faiss_index_path) | |
print(f"New FAISS vector store created and saved at {faiss_index_path}") | |
except Exception as e: | |
raise RuntimeError(f"Error during FAISS index creation: {e}") | |
else: | |
raise RuntimeError("No document provided for knowledge retrieval setup.") | |
documents_retriever = documents_faiss_index.as_retriever() | |
# Prompt template for the interview | |
interview_prompt_template = """ | |
Use the following pieces of context to answer the question at the end. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Keep the answer as concise as possible. | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
interview_prompt = PromptTemplate.from_template(interview_prompt_template) | |
# Prompt template for the report | |
report_prompt_template = """ | |
Use the following pieces of context to generate a report at the end. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Keep the answer as concise as possible. | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
report_prompt = PromptTemplate.from_template(report_prompt_template) | |
# Create RetrievalQA chains | |
interview_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=documents_retriever, | |
chain_type_kwargs={"prompt": interview_prompt} | |
) | |
report_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=documents_retriever, | |
chain_type_kwargs={"prompt": report_prompt} | |
) | |
return interview_chain, report_chain, documents_retriever | |
def get_next_response(interview_chain, message, history, question_count): | |
if question_count >= 5: | |
return "Thank you for your responses. I will now prepare a report." | |
if not interview_chain: | |
return "Error: Knowledge base not loaded. Please contact an admin." | |
# Generate the next question using RetrievalQA | |
response = interview_chain.invoke({"query": message}) | |
next_question = response.get("result", "Could you provide more details on that?") | |
return next_question | |
def generate_report(report_chain, history, language): | |
combined_history = "\n".join(history) | |
# If report_chain is not available, return a fallback report | |
if not report_chain: | |
print("[DEBUG] Report chain not available. Generating a fallback HR report.") | |
fallback_report = f""" | |
HR Report in {language}: | |
Interview Summary: | |
{combined_history} | |
Assessment: | |
Based on the responses, the candidate's strengths, areas for improvement, and overall fit for the role have been noted. No additional knowledge-based insights due to missing vector database. | |
""" | |
return fallback_report | |
# Generate report using the retrieval chain | |
result = report_chain.invoke({"query": f"Please provide an HR report based on the interview in {language}. Interview history: {combined_history}"}) | |
return result.get("result", "Unable to generate report due to insufficient information.") | |
def get_initial_question(interview_chain): | |
if not interview_chain: | |
return "Please introduce yourself and tell me a little bit about your professional background." | |
result = interview_chain.invoke({"query": "What should be the first question in an HR interview?"}) | |
return result.get("result", "Could you tell me a little bit about yourself and your professional background?") | |