andreinigo commited on
Commit
2c7c144
·
1 Parent(s): 76e7bed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -19,19 +19,27 @@ def proper_query(query):
19
  response = openai.Completion.create(
20
  engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.3)
21
  return response.choices[0].text
22
-
23
- embeddings = OpenAIEmbeddings()
24
- #transform a column of a csv into a list
25
- df = pd.read_csv('reglamento-avianca.csv')
26
- text = df['text'].tolist()
 
 
 
 
 
 
 
27
 
28
  text_splitter = RecursiveCharacterTextSplitter(
29
- # Set a really small chunk size, just to show.
30
- chunk_size = 1000,
31
- chunk_overlap = 0,
32
- length_function = len,
33
  )
34
- texts = text_splitter.split_text(text)
 
 
 
35
 
36
  docsearch = FAISS.from_texts(texts, embeddings)
37
 
 
19
  response = openai.Completion.create(
20
  engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.3)
21
  return response.choices[0].text
22
+
23
+
24
+ def ingest_docs():
25
+ """Get documents from the input folder"""
26
+ #loader = ReadTheDocsLoader("input/reglamento-avianca.txt")
27
+ with open('input/reglamento-avianca.txt', 'r', encoding="utf-8") as file:
28
+ text = file.read()
29
+ document_split = text.split('\"\n\"\n')
30
+ docs = []
31
+ metadatas = []
32
+ for i in range(len(document_split)):
33
+ docs.append(document_split[i])
34
 
35
  text_splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=1000,
37
+ chunk_overlap=0,
 
 
38
  )
39
+
40
+ embeddings = OpenAIEmbeddings()
41
+
42
+ texts = text_splitter.split_text(docs)
43
 
44
  docsearch = FAISS.from_texts(texts, embeddings)
45