Spaces:
Runtime error
Runtime error
File size: 3,138 Bytes
aa170f0 b515f84 e887c2a b515f84 e887c2a b515f84 080bbc9 b515f84 080bbc9 84485f7 c4ca36a e06901a 6ca9efb 946ff7c e06901a 4bbe73d e06901a 4bbe73d 6ca9efb 4bbe73d d43c85e c4ca36a 4bbe73d 080bbc9 c4ca36a bf2279b 946ff7c bf2279b 080bbc9 bf2279b 080bbc9 b515f84 b0cff56 b515f84 080bbc9 12fb877 b515f84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import gradio as gr
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=350, chunk_overlap=10)
from langchain.llms import HuggingFaceHub
model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
from langchain.embeddings import HuggingFaceHubEmbeddings
embeddings = HuggingFaceHubEmbeddings()
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from utils import download_from_google_drive, unzip_file
from dotenv import load_dotenv
load_dotenv()
file_id = os.getenv("gdown_file_id", "")
print(file_id)
#web_links = ["https://www.databricks.com/","https://help.databricks.com","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html"]
#loader = WebBaseLoader(web_links)
#documents = loader.load()
# gdown_file_id = os.getenv(gdown_file_id)
# download_from_google_drive(gdown_file_id)
# file_id = os.getenv(gdown_file_id) # Replace with your file ID
download_from_google_drive(file_id)
zip_file_path = "gdown_chroma_db.zip" # Replace with your zip file path
extract_path = "/gdown_chroma_db"
embedding_db_location = "/gdown_chroma_db"
unzip_file(zip_file_path,extract_path)
db = Chroma(persist_directory=embedding_db_location, embedding_function=embeddings)
db.get()
#texts = text_splitter.split_documents(documents)
#db = Chroma.from_documents(texts, embedding_function=embeddings)
retriever = db.as_retriever()
global qa
qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)
def add_text(history, text):
history = history + [(text, None)]
return history, ""
def bot(history):
response = infer(history[-1][0])
history[-1][1] = response['result']
return history
def infer(question):
query = question
result = qa({"query": query})
return result
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>Chat with PDF</h1>
<p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
chatbot = gr.Chatbot([], elem_id="chatbot")
with gr.Row():
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
demo.launch() |