andreinigo commited on
Commit
7a96a6f
·
1 Parent(s): a659f52

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +103 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import pandas as pd
3
+ import os
4
+ import gradio as gr
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.vectorstores.faiss import FAISS
8
+ from langchain.docstore.document import Document
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.llms import OpenAI
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ import openai
14
+
15
+ def proper_query(query):
16
+ prompt = f"El siguiente texto es una pregunta en español: {query}\n\n¿Cómo debería ser la pregunta para que sea correcta en español?\nPregunta corregida:"
17
+ response = openai.Completion.create(engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.2)
18
+ return response.choices[0].text
19
+
20
+ def extract_text_from_pdf(file_path, splitter = "\n\n"):
21
+ with open(file_path, 'rb') as file:
22
+ pdf = PyPDF2.PdfReader(file)
23
+ text = ''
24
+ for page in pdf.pages:
25
+ text += page.extract_text()
26
+ chunks = text.split(splitter)
27
+ chunks = [splitter + chunk for chunk in chunks[1:]]
28
+ #create a csv file with the chunks in one column
29
+ #df = pd.DataFrame(chunks, columns=['text'])
30
+ #write to csv
31
+ #df.to_csv(file_path[:-4]+'.csv', index=False)
32
+ return chunks
33
+
34
+ embeddings = OpenAIEmbeddings()
35
+ text = extract_text_from_pdf('transito-dgo.pdf','ARTÍCULO')
36
+
37
+ text_splitter = RecursiveCharacterTextSplitter(
38
+ # Set a really small chunk size, just to show.
39
+ chunk_size = 500,
40
+ chunk_overlap = 0,
41
+ length_function = len,
42
+ )
43
+
44
+ texts = text_splitter.split_text(text)
45
+
46
+ docsearch = FAISS.from_texts(texts, embeddings)
47
+
48
+ def asesor_transito(query):
49
+ query = proper_query(query)
50
+ docs = docsearch.similarity_search(query)
51
+ refine_prompt_template = (
52
+ "The original question is as follows: {question}\n"
53
+ "We have provided an existing answer: {existing_answer}\n"
54
+ "You have the opportunity to refine the existing answer,"
55
+ "only if needed, exclusively with the context below.\n"
56
+ "------------\n"
57
+ "{context_str}\n"
58
+ "------------\n"
59
+ "If that context is not helpful to answer the question, then omit it.\n"
60
+ "Shorten the answer if possible.\n"
61
+ "Reply in the same language as the question.\n"
62
+ "If the context is not helpful to answer the question or if it is not a question, then you will refuse to answer.\n"
63
+ "Answer:"
64
+ )
65
+ refine_prompt = PromptTemplate(
66
+ input_variables=["question", "existing_answer", "context_str"],
67
+ template=refine_prompt_template,
68
+ )
69
+
70
+
71
+ initial_qa_template = (
72
+ "Context information is below. \n"
73
+ "---------------------\n"
74
+ "{context_str}"
75
+ "\n---------------------\n"
76
+ "Given the context information and not prior knowledge, "
77
+ "answer the question: {question}\n"
78
+ "If the context is not helpful to answer the question or if it is not a question, then you will refuse to answer.\n"
79
+ )
80
+ initial_qa_prompt = PromptTemplate(
81
+ input_variables=["context_str", "question"], template=initial_qa_template
82
+ )
83
+ chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=False,
84
+ question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
85
+ ans = chain({"input_documents": docs, "question": query}, return_only_outputs=True)['output_text']
86
+ return ans
87
+
88
+ demo = gr.Interface(
89
+ fn=asesor_transito,
90
+ inputs=[
91
+ gr.Textbox(label="Hola soy tu asesor personal de tránsito de Durango, ¿cuál es tu pregunta? \nHi, I am your Durango transit law personal assistant, ask me anything about Mexico City's transit law in any language.", lines=3,),
92
+ ],
93
+ outputs=[gr.Textbox(label="Respuesta: \nAnswer: ")],
94
+ title="Asesor de Reglamento de Tránsito Durango",
95
+ examples=[
96
+ ["cuál es la multa por no llevar casco?"],
97
+ ["qué pasa si no tengo licencia de conducir?"],
98
+ ["What would happen if I drove under the influence of alcohol?"]
99
+ ],
100
+ )
101
+
102
+ if __name__ == "__main__":
103
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai==0.25.0
2
+ matplotlib==3.6.2
3
+ numpy==1.23.5
4
+ PyPDF2==3.0.1
5
+ langchain==0.0.68
6
+ zipfile36==0.1.3
7
+ faiss-cpu==1.7.3