File size: 3,138 Bytes
aa170f0
b515f84
 
e887c2a
b515f84
e887c2a
 
b515f84
 
080bbc9
b515f84
 
 
 
 
 
 
080bbc9
84485f7
 
c4ca36a
e06901a
6ca9efb
 
 
 
 
 
 
 
 
 
946ff7c
 
 
e06901a
4bbe73d
e06901a
4bbe73d
 
6ca9efb
4bbe73d
d43c85e
c4ca36a
 
 
 
4bbe73d
080bbc9
c4ca36a
bf2279b
946ff7c
bf2279b
080bbc9
bf2279b
080bbc9
 
 
b515f84
 
 
 
 
 
 
 
 
 
 
 
b0cff56
 
b515f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
080bbc9
12fb877
b515f84
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import gradio as gr

from langchain.document_loaders import WebBaseLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=350, chunk_overlap=10)

from langchain.llms import HuggingFaceHub
model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})

from langchain.embeddings import HuggingFaceHubEmbeddings
embeddings = HuggingFaceHubEmbeddings()

from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA

from langchain.prompts import ChatPromptTemplate

from utils import download_from_google_drive, unzip_file

from dotenv import load_dotenv



load_dotenv()

file_id = os.getenv("gdown_file_id", "")

print(file_id)

#web_links = ["https://www.databricks.com/","https://help.databricks.com","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html"]
#loader = WebBaseLoader(web_links)
#documents = loader.load()

# gdown_file_id = os.getenv(gdown_file_id)

# download_from_google_drive(gdown_file_id)

# file_id = os.getenv(gdown_file_id)  # Replace with your file ID
download_from_google_drive(file_id)
zip_file_path = "gdown_chroma_db.zip"  # Replace with your zip file path
extract_path = "/gdown_chroma_db"

embedding_db_location = "/gdown_chroma_db"

unzip_file(zip_file_path,extract_path)
     
db = Chroma(persist_directory=embedding_db_location, embedding_function=embeddings)
db.get()
#texts = text_splitter.split_documents(documents)
#db = Chroma.from_documents(texts, embedding_function=embeddings)
retriever = db.as_retriever()

global qa 
qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)


def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response['result']
    return history

def infer(question):
    
    query = question
    result = qa({"query": query})
    return result

css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with PDF</h1>
    <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
    when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""


with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)      
        chatbot = gr.Chatbot([], elem_id="chatbot")
        with gr.Row():
            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()