Phi3-ORPO

Paused

App Files Files Community

justinj92 commited on May 7, 2024

Commit

47ceafa

verified ·

1 Parent(s): 160f6d0

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -5

app.py CHANGED Viewed

@@ -78,7 +78,6 @@ documents = loader.load()
 text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
 texts = text_splitter.split_documents(documents)
-@spaces.GPU(duration=120)
 if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
     embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
     vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
@@ -87,7 +86,7 @@ if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
 embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
 vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
 def build_model(model_repo = CFG.model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_repo)
     model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
@@ -168,7 +167,7 @@ qa_chain = RetrievalQA.from_chain_type(
     verbose = False
 )
-@spaces.GPU(duration=120)
 def wrap_text_preserve_newlines(text, width=1500):
     # Split the input text into lines based on newline characters
     lines = text.split('\n')
@@ -181,7 +180,7 @@ def wrap_text_preserve_newlines(text, width=1500):
     return wrapped_text
-@spaces.GPU(duration=120)
 def process_llm_response(llm_response):
     ans = wrap_text_preserve_newlines(llm_response['result'])
@@ -204,7 +203,7 @@ def process_llm_response(llm_response):
     return ans.strip()
-@spaces.GPU(duration=120)
 def llm_ans(query):
     llm_response = qa_chain.invoke(query)

 text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
 texts = text_splitter.split_documents(documents)
 if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
     embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
     vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
 embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
 vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
+@spaces.GPU
 def build_model(model_repo = CFG.model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_repo)
     model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
     verbose = False
 )
+@spaces.GPU
 def wrap_text_preserve_newlines(text, width=1500):
     # Split the input text into lines based on newline characters
     lines = text.split('\n')
     return wrapped_text
+@spaces.GPU
 def process_llm_response(llm_response):
     ans = wrap_text_preserve_newlines(llm_response['result'])
     return ans.strip()
+@spaces.GPU
 def llm_ans(query):
     llm_response = qa_chain.invoke(query)