Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import zipfile | |
import pandas as pd | |
from langchain.document_loaders import DataFrameLoader | |
#import tiktoken | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from langchain_openai import ChatOpenAI | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from bs4 import BeautifulSoup | |
import requests | |
# Function to load vector database | |
def load_vector_db(zip_file_path, extract_path): | |
with st.spinner("Loading vector store..."): | |
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_path) | |
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
vectordb = Chroma( | |
persist_directory=extract_path, | |
embedding_function=embedding_function | |
) | |
st.success("Vector store loaded") | |
return vectordb | |
# # Function to augment prompt | |
# def augment_prompt(query, vectordb): | |
# results = vectordb.similarity_search(query, k=10) | |
# source_knowledge = "\n".join([x.page_content for x in results]) | |
# augmented_prompt = f""" | |
# You are an AI assistant. Use the context provided below to answer the question as comprehensively as possible. | |
# If the answer is not contained within the context, respond politely that you cannot provide that information. | |
# Context: | |
# {source_knowledge} | |
# Question: {query} | |
# """ | |
# return augmented_prompt | |
# Function to augment prompt | |
def augment_prompt(query, vectordb, search_results): | |
results = vectordb.similarity_search(query, k=5) | |
source_knowledge = "\n".join([x.page_content for x in results]) | |
augmented_prompt = f""" | |
You are an AI assistant. Use the context provided below to answer the question as comprehensively as possible. | |
If the answer is not contained within the context, respond with "I don't know". | |
Context: | |
{source_knowledge} | |
Additional Web Search Results: | |
{search_results} | |
Question: {query} | |
""" | |
return augmented_prompt | |
# Function to handle chat with OpenAI | |
def chat_with_openai(query, vectordb, openai_api_key, search_results): | |
chat = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_api_key, timeout=30) # Increased timeout | |
augmented_query = augment_prompt(query, vectordb, search_results) | |
prompt = HumanMessage(content=augmented_query) | |
messages = [ | |
SystemMessage(content="You are a helpful assistant."), | |
prompt | |
] | |
res = chat(messages) | |
return res.content | |
# Function to perform web search | |
def perform_web_search(query): | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
search_results = "" | |
# Glassdoor search | |
glassdoor_url = f"https://www.glassdoor.com/Search/results.htm?keyword={query}" | |
response = requests.get(glassdoor_url, headers=headers) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.text, 'html.parser') | |
glassdoor_results = soup.find_all('div', {'class': 'jobContainer'}) | |
for result in glassdoor_results[:5]: # limiting to first 3 results | |
title = result.find('a', {'class': 'jobInfoItem jobTitle'}).text.strip() if result.find('a', {'class': 'jobInfoItem jobTitle'}) else 'N/A' | |
company = result.find('div', {'class': 'jobInfoItem jobEmpolyerName'}).text.strip() if result.find('div', {'class': 'jobInfoItem jobEmpolyerName'}) else 'N/A' | |
location = result.find('span', {'class': 'subtle loc'}).text.strip() if result.find('span', {'class': 'subtle loc'}) else 'N/A' | |
search_results += f"Glassdoor Result: {title} at {company}, {location}\n" | |
# Indeed search | |
indeed_url = f"https://www.indeed.com/jobs?q={query}&limit=10" | |
response = requests.get(indeed_url, headers=headers) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.text, 'html.parser') | |
indeed_results = soup.find_all('div', {'class': 'jobsearch-SerpJobCard'}) | |
for result in indeed_results[:5]: # limiting to first 3 results | |
title = result.find('h2', {'class': 'title'}).text.strip() if result.find('h2', {'class': 'title'}) else 'N/A' | |
company = result.find('span', {'class': 'company'}).text.strip() if result.find('span', {'class': 'company'}) else 'N/A' | |
location = result.find('span', {'class': 'location'}).text.strip() if result.find('span', {'class': 'location'}) else 'N/A' | |
search_results += f"Indeed Result: {title} at {company}, {location}\n" | |
return search_results | |
# Streamlit UI | |
st.title("Data Roles Company Finder Chatbot") | |
st.write("This app helps users find companies hiring for data roles, providing information such as job title, salary estimate, job description, company rating, and more.") | |
# Load vector database | |
zip_file_path = "chroma_db_compressed_.zip" | |
extract_path = "./chroma_db_extracted" | |
vectordb = load_vector_db(zip_file_path, extract_path) | |
# Initialize session state for chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat history | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# User input | |
if prompt := st.chat_input("Enter your query"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
# Perform web search | |
search_results = perform_web_search(prompt) | |
# Chat with OpenAI | |
openai_api_key = st.secrets["OPENAI_API_KEY"] | |
response = chat_with_openai(prompt, vectordb, openai_api_key, search_results) | |
# Display assistant response | |
with st.chat_message("assistant"): | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
# # Function to handle chat with OpenAI | |
# def chat_with_openai(query, vectordb, openai_api_key): | |
# chat = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_api_key) | |
# augmented_query = augment_prompt(query, vectordb) | |
# prompt = HumanMessage(content=augmented_query) | |
# messages = [ | |
# SystemMessage(content="You are a helpful assistant."), | |
# prompt | |
# ] | |
# res = chat(messages) | |
# return res.content | |
# # Streamlit UI | |
# st.title("Data Roles Company Finder Chatbot") | |
# st.write("This app helps users find companies hiring for data roles, providing information such as job title, salary estimate, job description, company rating, and more.") | |
# # Load vector database | |
# zip_file_path = "chroma_db_compressed_.zip" | |
# extract_path = "./chroma_db_extracted" | |
# vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Initialize session state for chat history | |
# if "messages" not in st.session_state: | |
# st.session_state.messages = [] | |
# # Display chat history | |
# for message in st.session_state.messages: | |
# with st.chat_message(message["role"]): | |
# st.markdown(message["content"]) | |
# # User input | |
# if prompt := st.chat_input("Enter your query"): | |
# st.session_state.messages.append({"role": "user", "content": prompt}) | |
# with st.chat_message("user"): | |
# st.markdown(prompt) | |
# with st.chat_message("assistant"): | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(prompt, vectordb, openai_api_key) | |
# st.markdown(response) | |
# st.session_state.messages.append({"role": "assistant", "content": response}) | |
# # Query input | |
# query = st.text_input("Enter your query", "") | |
# if st.button("Send"): | |
# if query: | |
# # Add user query to chat history | |
# st.session_state.messages.append({"role": "user", "content": query}) | |
# with st.chat_message("user"): | |
# st.markdown(query) | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# # Add AI response to chat history | |
# st.session_state.messages.append({"role": "assistant", "content": response}) | |
# with st.chat_message("assistant"): | |
# st.markdown(response) | |
# # Streamlit UI | |
# st.title("Document Processing and AI Chat with LangChain") | |
# # Load vector database | |
# zip_file_path = "chroma_db_compressed_.zip" | |
# extract_path = "./chroma_db_extracted" | |
# vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Query input | |
# query = st.text_input("Enter your query", "List three companies where I can work as a business analyst with their location and salary") | |
# if st.button("Get Answer"): | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# st.write("Response from AI:") | |
# st.write(response) | |
# # Streamlit UI | |
# st.title("Data Roles Company Finder Chatbot") | |
# st.write("This app helps users find companies hiring for data roles, providing information such as job title, salary estimate, job description, company rating, and more.") | |
# # Load vector database | |
# zip_file_path = "chroma_db_compressed_.zip" | |
# extract_path = "./chroma_db_extracted" | |
# vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Initialize session state for chat history | |
# if "messages" not in st.session_state: | |
# st.session_state.messages = [ | |
# SystemMessage(content="You are a helpful assistant.") | |
# ] | |
# # Display chat history | |
# for message in st.session_state.messages: | |
# if isinstance(message, HumanMessage): | |
# st.write(f"You: {message.content}") | |
# else: | |
# st.write(f"AI: {message.content}") | |
# # Query input | |
# query = st.text_input("Enter your query", "List three companies where I can work as a business analyst with their location and salary") | |
# if st.button("Send"): | |
# if query: | |
# # Add user query to chat history | |
# st.session_state.messages.append(HumanMessage(content=query)) | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# # Add AI response to chat history | |
# st.session_state.messages.append(SystemMessage(content=response)) | |
# # Display chat history | |
# for message in st.session_state.messages: | |
# if isinstance(message, HumanMessage): | |
# st.write(f"You: {message.content}") | |
# else: | |
# st.write(f"AI: {message.content}") |