Spaces:
Build error
Build error
File size: 4,510 Bytes
e76168d 1d41035 e76168d 6743ef9 e76168d 6743ef9 e76168d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from PyPDF2 import PdfWriter
import gradio as gr
import os
from dotenv import load_dotenv
import openai
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.environ['my_secret']
loader = PyPDFLoader("/home/user/app/docs.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
#vector embedding
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(texts, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever = retriever,
return_source_documents=True)
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
system_template="""You are a chatbot that advises public officials on how to respond to complaints. Use the following pieces of context to answer the users question shortly.
Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
If you don't know the answer, just say that "I don't know", don't try to make up an answer.
----------------
{summaries}
You MUST answer in Korean and in Markdown format:"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
chain_type_kwargs = {"prompt": prompt}
llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever = retriever,
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs
)
query = "ν볡ν μΈμμ΄λ?"
result = chain(query)
for doc in result['source_documents']:
print('λ΄μ© : ' + doc.page_content[0:100].replace('\n', ' '))
print('νμΌ : ' + doc.metadata['source'])
print('νμ΄μ§ : ' + str(doc.metadata['page']))
def respond(message, chat_history): # μ±ν
λ΄μ μλ΅μ μ²λ¦¬νλ ν¨μλ₯Ό μ μν©λλ€.
result = chain(message)
bot_message = result['answer']
for i, doc in enumerate(result['source_documents']):
bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
chat_history.append((message, bot_message)) # μ±ν
κΈ°λ‘μ μ¬μ©μμ λ©μμ§μ λ΄μ μλ΅μ μΆκ°ν©λλ€.
return "", chat_history # μμ λ μ±ν
κΈ°λ‘μ λ°νν©λλ€.
with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ¬μ©νμ¬ μΈν°νμ΄μ€λ₯Ό μμ±ν©λλ€.
gr.Markdown("# μΉ¨μ°©νμΈμ. \n μ΄λ €μ΄ λ―Όμμ λμνλ λ°©λ²μ μλ €μ£Όλ μ±λ΄μ
λλ€.")
chatbot = gr.Chatbot(label="μ±ν
μ°½") # 'μ±ν
μ°½'μ΄λΌλ λ μ΄λΈμ κ°μ§ μ±ν
λ΄ μ»΄ν¬λνΈλ₯Ό μμ±ν©λλ€.
msg = gr.Textbox(label="μ
λ ₯") # 'μ
λ ₯'μ΄λΌλ λ μ΄λΈμ κ°μ§ ν
μ€νΈλ°μ€λ₯Ό μμ±ν©λλ€.
clear = gr.Button("μ΄κΈ°ν") # 'μ΄κΈ°ν'λΌλ λ μ΄λΈμ κ°μ§ λ²νΌμ μμ±ν©λλ€.
msg.submit(respond, [msg, chatbot], [msg, chatbot]) # ν
μ€νΈλ°μ€μ λ©μμ§λ₯Ό μ
λ ₯νκ³ μ μΆνλ©΄ respond ν¨μκ° νΈμΆλλλ‘ ν©λλ€.
clear.click(lambda: None, None, chatbot, queue=False) # 'μ΄κΈ°ν' λ²νΌμ ν΄λ¦νλ©΄ μ±ν
κΈ°λ‘μ μ΄κΈ°νν©λλ€.
demo.launch(debug=True) # μΈν°νμ΄μ€λ₯Ό μ€νν©λλ€. μ€ννλ©΄ μ¬μ©μλ 'μ
λ ₯' ν
μ€νΈλ°μ€μ λ©μμ§λ₯Ό μμ±νκ³ μ μΆν μ μμΌλ©°, 'μ΄κΈ°ν' λ²νΌμ ν΅ν΄ μ±ν
κΈ°λ‘μ μ΄κΈ°ν ν μ μμ΅λλ€.
|