File size: 2,970 Bytes
aa170f0
b515f84
 
e887c2a
b515f84
e887c2a
 
b515f84
 
080bbc9
b515f84
 
 
 
 
 
 
080bbc9
84485f7
 
e06901a
 
946ff7c
 
 
e06901a
4bbe73d
e06901a
4bbe73d
 
 
 
 
 
 
080bbc9
bf2279b
 
946ff7c
bf2279b
080bbc9
bf2279b
080bbc9
 
 
b515f84
 
 
 
 
 
 
 
 
 
 
 
b0cff56
 
b515f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
080bbc9
12fb877
b515f84
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import gradio as gr

from langchain.document_loaders import WebBaseLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=350, chunk_overlap=10)

from langchain.llms import HuggingFaceHub
model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})

from langchain.embeddings import HuggingFaceHubEmbeddings
embeddings = HuggingFaceHubEmbeddings()

from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA

from langchain.prompts import ChatPromptTemplate

from utils import download_from_google_drive

#web_links = ["https://www.databricks.com/","https://help.databricks.com","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html"]
#loader = WebBaseLoader(web_links)
#documents = loader.load()

# gdown_file_id = os.getenv(gdown_file_id)

# download_from_google_drive(gdown_file_id)

file_id = os.getenv(gdown_file_id)  # Replace with your file ID
download_from_google_drive(file_id)
zip_file_path = "/content/chroma_db-complete.zip"  # Replace with your zip file path
extract_path = "/chroma_db"
unzip_file(zip_file_path,extract_path)
     
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
db.get()
#texts = text_splitter.split_documents(documents)
#db = Chroma.from_documents(texts, embedding_function=embeddings)
retriever = db.as_retriever()

global qa 
qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)


def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response['result']
    return history

def infer(question):
    
    query = question
    result = qa({"query": query})
    return result

css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with PDF</h1>
    <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
    when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""


with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)      
        chatbot = gr.Chatbot([], elem_id="chatbot")
        with gr.Row():
            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()