andreinigo commited on
Commit
0799221
·
1 Parent(s): 2b729fc

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +100 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pinecone
2
+ from langchain.vectorstores import Pinecone
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ import os
8
+ from langchain.docstore.document import Document
9
+ from langchain.document_loaders import PyPDFLoader
10
+ import gradio as gr
11
+ import openai
12
+
13
+ os.environ["OPENAI_API_KEY"] = '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'
14
+
15
+ # initialize pinecone
16
+ pinecone.init(api_key='15f4e36a-00e9-46ad-8dcb-f01e5b2f7568',
17
+ environment='us-east1-gcp')
18
+ # delete the vectors in the index if there are any
19
+ index = pinecone.Index('asesura')
20
+ delete_response = index.delete(delete_all=True)
21
+
22
+ # load the documents
23
+ loader = PyPDFLoader("SaludClasico2023.pdf")
24
+ pages = loader.load_and_split()
25
+
26
+ n_pages = [page.page_content.replace("CÓDIGO CLAUSULADO - 01/01/2023 - 1411 - P - 35 - F-14-11-0090-214- D00I. CÓDIGO NOTA TÉCNICA – 01/01/2023 - 1411 - NT-P - 35 - N-14-11-0090-031", "") for page in pages]
27
+ print(len(n_pages))
28
+ # If a page has less than 100 characters, append it to the next one
29
+ for i in range(len(n_pages)-1):
30
+ if len(n_pages[i]) < 100:
31
+ n_pages[i+1] = n_pages[i] + n_pages[i+1]
32
+ n_pages[i] = ""
33
+
34
+ n_pages = [page for page in n_pages if len(page) > 100]
35
+
36
+ pages_summarized = []
37
+ #summarize each page
38
+ for i, chunk in enumerate(n_pages):
39
+
40
+ messages = [
41
+ {"role": "system", "content": "Summarize this text in the same language as the user's input."}]
42
+ messages.append({"role": "user", "content": chunk})
43
+
44
+ response = openai.ChatCompletion.create(
45
+ model="gpt-4",
46
+ messages=messages,
47
+ temperature=.2,
48
+ top_p=1,
49
+ frequency_penalty=0,
50
+ presence_penalty=0
51
+ )
52
+
53
+ pages_summarized.append(
54
+ response["choices"][0]["message"]['content'].strip())
55
+
56
+ pages = [Document(page_content = page_summarized, metadata={"page": i}) for i, page_summarized in enumerate(pages_summarized)]
57
+
58
+
59
+ # split the documents into chunks
60
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
61
+ documents = text_splitter.split_documents(pages)
62
+
63
+ embeddings = OpenAIEmbeddings()
64
+
65
+ # initialize pinecone
66
+ pinecone.init(
67
+ api_key="15f4e36a-00e9-46ad-8dcb-f01e5b2f7568", # find at app.pinecone.io
68
+ environment="us-east1-gcp" # next to api key in console
69
+ )
70
+
71
+ index_name = "asesura"
72
+
73
+ # create the new vectors
74
+ docsearch = Pinecone.from_documents(
75
+ documents, embeddings, index_name=index_name)
76
+
77
+ # create the conversational chain
78
+ qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(
79
+ temperature=0), docsearch.as_retriever(), return_source_documents=True)
80
+
81
+
82
+ def predict(query):
83
+ response = openai.ChatCompletion.create(
84
+ model="gpt-4",
85
+ messages=[{"role": "system", "content": "Change the user's question so it is correctly made, with the correct punctuation, grammar, and spelling. The question is being made to an insurance policy called 'Sura Seguro de Salud Clásico' by a company named Sura. Return the question in the same language as the user."},
86
+ {"role": "user", "content": query}],
87
+ temperature=0.1)
88
+ refined_q = response.choices[0].message.content
89
+ print(refined_q)
90
+ response = qa(
91
+ {"question": refined_q, "chat_history": []})['answer']
92
+ return response
93
+
94
+
95
+ pregunta = gr.Textbox(label="Pregunta", placeholder="Escribe tu pregunta")
96
+ respuesta = gr.Textbox(
97
+ label="Respuesta")
98
+
99
+ gr.Interface(predict, pregunta, respuesta, title="Asesura",
100
+ description="Bienvenido a tu asesor personal de seguros Sura. Pregunta lo que necesites saber sobre el Seguro de Salud Clásico de Sura 2023").launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain
2
+ pinecone
3
+ openai
4
+ tiktoken