daneshjoy commited on
Commit
eab155e
·
1 Parent(s): 4620fde

try to fix sql error

Browse files
Files changed (1) hide show
  1. app.py +60 -60
app.py CHANGED
@@ -9,65 +9,6 @@ doc_dir = "data/wiki_gameofthrones_txt12"
9
  sql_file = 'faiss_doc_store.db'
10
  faiss_file = 'faiss_index.faiss'
11
 
12
- # %% ------------------------------------------- Creating Doc store
13
- if not os.path.exists(sql_file) or not os.path.exists(faiss_file):
14
- from haystack.document_stores import FAISSDocumentStore
15
- from haystack.nodes import DensePassageRetriever
16
- from haystack.utils import convert_files_to_docs, clean_wiki_text
17
-
18
-
19
- module_dir = os.path.dirname(os.path.abspath(__file__))
20
- os.chdir(module_dir)
21
-
22
-
23
-
24
- # %% Download/Load Docs
25
-
26
- # Get some files that we want to use
27
- # s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt12.zip"
28
- # fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
29
-
30
- print('---> Loading Documents ...')
31
-
32
- # Convert files to docs + cleaning
33
- docs = convert_files_to_docs(dir_path=doc_dir,
34
- clean_func=clean_wiki_text,
35
- split_paragraphs=True)
36
-
37
- # %% Document Store
38
-
39
- print('---> Creating document store ...')
40
- document_store = FAISSDocumentStore(embedding_dim=128,
41
- faiss_index_factory_str="Flat",
42
- sql_url=f"sqlite:///{sql_file}")
43
-
44
-
45
-
46
- # %% Retriever (DPR)
47
-
48
- print('---> Initializing retriever ...')
49
- retriever = DensePassageRetriever(
50
- document_store=document_store,
51
- query_embedding_model="vblagoje/dpr-question_encoder-single-lfqa-wiki",
52
- passage_embedding_model="vblagoje/dpr-ctx_encoder-single-lfqa-wiki",
53
- use_gpu=True
54
- )
55
-
56
- # %% Create Embeddings and save results
57
- document_store.update_embeddings(retriever)
58
-
59
- print('---> Saving results ...')
60
- # update db
61
- document_store.write_documents(docs)
62
- # save faiss file
63
- document_store.save(faiss_file)
64
-
65
- print('Done!')
66
-
67
-
68
- # %% ------------------------------------------- Main App
69
-
70
-
71
  # Sliders
72
  DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
73
  # Adjust to a question that you would like users to see in the search bar when they load the UI:
@@ -181,5 +122,64 @@ def main(pipe):
181
  st.write(st.session_state.results['answers'][0].meta['content'][i])
182
  st.markdown('---\n')
183
 
184
- pipe = prepare()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  main(pipe)
 
9
  sql_file = 'faiss_doc_store.db'
10
  faiss_file = 'faiss_index.faiss'
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Sliders
13
  DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
14
  # Adjust to a question that you would like users to see in the search bar when they load the UI:
 
122
  st.write(st.session_state.results['answers'][0].meta['content'][i])
123
  st.markdown('---\n')
124
 
125
+ # %% ------------------------------------------- Creating Doc store
126
+ # if not os.path.exists(sql_file) or not os.path.exists(faiss_file):
127
+ from haystack.document_stores import FAISSDocumentStore
128
+ from haystack.nodes import DensePassageRetriever
129
+ from haystack.utils import convert_files_to_docs, clean_wiki_text
130
+
131
+
132
+ module_dir = os.path.dirname(os.path.abspath(__file__))
133
+ os.chdir(module_dir)
134
+
135
+
136
+
137
+ # %% Download/Load Docs
138
+
139
+ # Get some files that we want to use
140
+ # s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt12.zip"
141
+ # fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
142
+
143
+ print('---> Loading Documents ...')
144
+
145
+ # Convert files to docs + cleaning
146
+ docs = convert_files_to_docs(dir_path=doc_dir,
147
+ clean_func=clean_wiki_text,
148
+ split_paragraphs=True)
149
+
150
+ # %% Document Store
151
+
152
+ print('---> Creating document store ...')
153
+ document_store = FAISSDocumentStore(embedding_dim=128,
154
+ faiss_index_factory_str="Flat",
155
+ sql_url=f"sqlite:///{sql_file}")
156
+
157
+
158
+
159
+ # %% Retriever (DPR)
160
+
161
+ print('---> Initializing retriever ...')
162
+ retriever = DensePassageRetriever(
163
+ document_store=document_store,
164
+ query_embedding_model="vblagoje/dpr-question_encoder-single-lfqa-wiki",
165
+ passage_embedding_model="vblagoje/dpr-ctx_encoder-single-lfqa-wiki",
166
+ use_gpu=True
167
+ )
168
+
169
+ # %% Create Embeddings and save results
170
+ document_store.update_embeddings(retriever)
171
+
172
+ print('---> Saving results ...')
173
+ # update db
174
+ document_store.write_documents(docs)
175
+ # save faiss file
176
+ document_store.save(faiss_file)
177
+
178
+ print('Done!')
179
+
180
+
181
+ # %% ------------------------------------------- Main App
182
+
183
+ pipe = GenerativeQAPipeline(generator, retriever)
184
+ # pipe = prepare()
185
  main(pipe)