Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import zipfile | |
import pandas as pd | |
from langchain.document_loaders import DataFrameLoader | |
#import tiktoken | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
from langchain_core.messages import HumanMessage, SystemMessage | |
from langchain_openai import ChatOpenAI | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from bs4 import BeautifulSoup | |
import requests | |
import torch | |
# Function to load vector database | |
def load_vector_db(zip_file_path, extract_path): | |
with st.spinner("Loading vector store..."): | |
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_path) | |
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
vectordb = Chroma( | |
persist_directory=extract_path, | |
embedding_function=embedding_function | |
) | |
st.success("Vector store loaded") | |
return vectordb | |
# Function to augment prompt | |
def augment_prompt(query, vectordb): | |
results = vectordb.similarity_search(query, k=10) | |
source_knowledge = "\n".join([x.page_content for x in results]) | |
augmented_prompt = f""" | |
You are an AI assistant. Use the context provided below to answer the question as comprehensively as possible. | |
If the answer is not contained within the context, respond politely that you cannot provide that information. | |
Context: | |
{source_knowledge} | |
Question: {query} | |
""" | |
return augmented_prompt | |
# Function to handle chat with OpenAI | |
def chat_with_openai(query, vectordb, openai_api_key): | |
chat = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_api_key) | |
augmented_query = augment_prompt(query, vectordb) | |
prompt = HumanMessage(content=augmented_query) | |
messages = [ | |
SystemMessage(content="You are a helpful assistant."), | |
prompt | |
] | |
res = chat(messages) | |
return res.content | |
# # Function to handle chat with the Google open-source LLM | |
# def chat_with_google_llm(query, vectordb, tokenizer, model): | |
# augmented_query = augment_prompt(query, vectordb) | |
# input_ids = tokenizer(augmented_query, return_tensors="pt") #.to("cuda") | |
# outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) | |
# response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# return response | |
# Streamlit UI | |
st.title("Data Roles Company Finder Chatbot") | |
st.write("This app helps users find companies hiring for data roles, providing information such as job title, salary estimate, job description, company rating, and more.") | |
# Load vector database | |
zip_file_path = "chroma_db_compressed_.zip" | |
extract_path = "./chroma_db_extracted" | |
vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Load Google model and tokenizer | |
# tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
# model = AutoModelForCausalLM.from_pretrained( | |
# "google/gemma-2b-it", | |
# torch_dtype=torch.bfloat16 | |
# )#.to("cuda") | |
# Initialize session state for chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat history | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# User input | |
if prompt := st.chat_input("Enter your query"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
openai_api_key = st.secrets["OPENAI_API_KEY"] | |
response = chat_with_openai(prompt, vectordb, openai_api_key) | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
# User input | |
# if prompt := st.chat_input("Enter your query"): | |
# st.session_state.messages.append({"role": "user", "content": prompt}) | |
# with st.chat_message("user"): | |
# st.markdown(prompt) | |
# with st.chat_message("assistant"): | |
# response = chat_with_google_llm(prompt, vectordb, tokenizer, model) | |
# st.markdown(response) | |
# st.session_state.messages.append({"role": "assistant", "content": response}) | |
# # Query input | |
# query = st.text_input("Enter your query", "") | |
# if st.button("Send"): | |
# if query: | |
# # Add user query to chat history | |
# st.session_state.messages.append({"role": "user", "content": query}) | |
# with st.chat_message("user"): | |
# st.markdown(query) | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# # Add AI response to chat history | |
# st.session_state.messages.append({"role": "assistant", "content": response}) | |
# with st.chat_message("assistant"): | |
# st.markdown(response) | |
# # Streamlit UI | |
# st.title("Document Processing and AI Chat with LangChain") | |
# # Load vector database | |
# zip_file_path = "chroma_db_compressed_.zip" | |
# extract_path = "./chroma_db_extracted" | |
# vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Query input | |
# query = st.text_input("Enter your query", "List three companies where I can work as a business analyst with their location and salary") | |
# if st.button("Get Answer"): | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# st.write("Response from AI:") | |
# st.write(response) | |
# # Streamlit UI | |
# st.title("Data Roles Company Finder Chatbot") | |
# st.write("This app helps users find companies hiring for data roles, providing information such as job title, salary estimate, job description, company rating, and more.") | |
# # Load vector database | |
# zip_file_path = "chroma_db_compressed_.zip" | |
# extract_path = "./chroma_db_extracted" | |
# vectordb = load_vector_db(zip_file_path, extract_path) | |
# # Initialize session state for chat history | |
# if "messages" not in st.session_state: | |
# st.session_state.messages = [ | |
# SystemMessage(content="You are a helpful assistant.") | |
# ] | |
# # Display chat history | |
# for message in st.session_state.messages: | |
# if isinstance(message, HumanMessage): | |
# st.write(f"You: {message.content}") | |
# else: | |
# st.write(f"AI: {message.content}") | |
# # Query input | |
# query = st.text_input("Enter your query", "List three companies where I can work as a business analyst with their location and salary") | |
# if st.button("Send"): | |
# if query: | |
# # Add user query to chat history | |
# st.session_state.messages.append(HumanMessage(content=query)) | |
# # Chat with OpenAI | |
# openai_api_key = st.secrets["OPENAI_API_KEY"] | |
# response = chat_with_openai(query, vectordb, openai_api_key) | |
# # Add AI response to chat history | |
# st.session_state.messages.append(SystemMessage(content=response)) | |
# # Display chat history | |
# for message in st.session_state.messages: | |
# if isinstance(message, HumanMessage): | |
# st.write(f"You: {message.content}") | |
# else: | |
# st.write(f"AI: {message.content}") |