Update app.py
Browse files
app.py
CHANGED
@@ -78,7 +78,6 @@ documents = loader.load()
|
|
78 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
79 |
texts = text_splitter.split_documents(documents)
|
80 |
|
81 |
-
@spaces.GPU(duration=120)
|
82 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
83 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
84 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
@@ -87,7 +86,7 @@ if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
|
87 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
88 |
vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
|
89 |
|
90 |
-
|
91 |
def build_model(model_repo = CFG.model_name):
|
92 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
93 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
|
@@ -168,7 +167,7 @@ qa_chain = RetrievalQA.from_chain_type(
|
|
168 |
verbose = False
|
169 |
)
|
170 |
|
171 |
-
@spaces.GPU
|
172 |
def wrap_text_preserve_newlines(text, width=1500):
|
173 |
# Split the input text into lines based on newline characters
|
174 |
lines = text.split('\n')
|
@@ -181,7 +180,7 @@ def wrap_text_preserve_newlines(text, width=1500):
|
|
181 |
|
182 |
return wrapped_text
|
183 |
|
184 |
-
@spaces.GPU
|
185 |
def process_llm_response(llm_response):
|
186 |
ans = wrap_text_preserve_newlines(llm_response['result'])
|
187 |
|
@@ -204,7 +203,7 @@ def process_llm_response(llm_response):
|
|
204 |
|
205 |
return ans.strip()
|
206 |
|
207 |
-
@spaces.GPU
|
208 |
def llm_ans(query):
|
209 |
|
210 |
llm_response = qa_chain.invoke(query)
|
|
|
78 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
79 |
texts = text_splitter.split_documents(documents)
|
80 |
|
|
|
81 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
82 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
83 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
|
|
86 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
87 |
vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
|
88 |
|
89 |
+
@spaces.GPU
|
90 |
def build_model(model_repo = CFG.model_name):
|
91 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
92 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
|
|
|
167 |
verbose = False
|
168 |
)
|
169 |
|
170 |
+
@spaces.GPU
|
171 |
def wrap_text_preserve_newlines(text, width=1500):
|
172 |
# Split the input text into lines based on newline characters
|
173 |
lines = text.split('\n')
|
|
|
180 |
|
181 |
return wrapped_text
|
182 |
|
183 |
+
@spaces.GPU
|
184 |
def process_llm_response(llm_response):
|
185 |
ans = wrap_text_preserve_newlines(llm_response['result'])
|
186 |
|
|
|
203 |
|
204 |
return ans.strip()
|
205 |
|
206 |
+
@spaces.GPU
|
207 |
def llm_ans(query):
|
208 |
|
209 |
llm_response = qa_chain.invoke(query)
|