Spaces:

bardicreels
/

rag2

Sleeping

App Files Files Community

user commited on Sep 12, 2024

Commit

becd78e

1 Parent(s): fe293b8

Implement data persistence for improved performance and reusability

Browse files

Files changed (1) hide show

app.py +41 -24

app.py CHANGED Viewed

@@ -43,12 +43,16 @@ MODEL_COMBINATIONS = {
 }
 @st.cache_resource
-def load_models(model_combination):
     try:
-        embedding_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
-        embedding_model = AutoModel.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
-        generation_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
-        generation_model = AutoModelForCausalLM.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
         return embedding_tokenizer, embedding_model, generation_tokenizer, generation_model
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
@@ -81,8 +85,8 @@ def create_faiss_index(embeddings):
     index.add(embeddings)
     return index
-def generate_response(query, tokenizer, generation_model, embedding_model, index, chunks):
-    inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
     with torch.no_grad():
         outputs = embedding_model(**inputs)
     query_embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
@@ -94,25 +98,28 @@ def generate_response(query, tokenizer, generation_model, embedding_model, index
     prompt = f"As the Muse of A.R. Ammons, respond to this query: {query}\nContext: {context}\nMuse:"
-    input_ids = tokenizer.encode(prompt, return_tensors="pt")
     output = generation_model.generate(input_ids, max_new_tokens=100, num_return_sequences=1, temperature=0.7)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
     muse_response = response.split("Muse:")[-1].strip()
     return muse_response
-def save_data(chunks, embeddings, index):
-    with open('chunks.pkl', 'wb') as f:
         pickle.dump(chunks, f)
-    np.save('embeddings.npy', embeddings)
-    faiss.write_index(index, 'faiss_index.bin')
-def load_data():
-    if os.path.exists('chunks.pkl') and os.path.exists('embeddings.npy') and os.path.exists('faiss_index.bin'):
-        with open('chunks.pkl', 'rb') as f:
             chunks = pickle.load(f)
-        embeddings = np.load('embeddings.npy')
-        index = faiss.read_index('faiss_index.bin')
         return chunks, embeddings, index
     return None, None, None
@@ -167,12 +174,22 @@ st.info(f"Potential time saved compared to slowest option: {MODEL_COMBINATIONS[s
 if st.button("Load Selected Models"):
     with st.spinner("Loading models and data..."):
         embedding_tokenizer, embedding_model, generation_tokenizer, generation_model = load_models(st.session_state.model_combination)
-        chunks = load_and_process_text('ammons_muse.txt')
-        embeddings = create_embeddings(chunks, embedding_model)
-        index = create_faiss_index(embeddings)
     st.session_state.models_loaded = True
-    st.success("Models loaded successfully!")
 if 'models_loaded' not in st.session_state or not st.session_state.models_loaded:
     st.warning("Please load the models before chatting.")
@@ -194,7 +211,7 @@ if prompt := st.chat_input("What would you like to ask the Muse?"):
     with st.spinner("The Muse is contemplating..."):
         try:
-            response = generate_response(prompt, tokenizer, generation_model, embedding_model, index, chunks)
         except Exception as e:
             response = f"I apologize, but I encountered an error: {str(e)}"

 }
 @st.cache_resource
+def load_models(combination):
     try:
+        embedding_model_name = MODEL_COMBINATIONS[combination]["embedding"]
+        generation_model_name = MODEL_COMBINATIONS[combination]["generation"]
+        embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
+        embedding_model = AutoModel.from_pretrained(embedding_model_name)
+        generation_tokenizer = AutoTokenizer.from_pretrained(generation_model_name)
+        generation_model = AutoModelForCausalLM.from_pretrained(generation_model_name)
         return embedding_tokenizer, embedding_model, generation_tokenizer, generation_model
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
     index.add(embeddings)
     return index
+def generate_response(query, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, index, chunks):
+    inputs = embedding_tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
     with torch.no_grad():
         outputs = embedding_model(**inputs)
     query_embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
     prompt = f"As the Muse of A.R. Ammons, respond to this query: {query}\nContext: {context}\nMuse:"
+    input_ids = generation_tokenizer.encode(prompt, return_tensors="pt")
     output = generation_model.generate(input_ids, max_new_tokens=100, num_return_sequences=1, temperature=0.7)
+    response = generation_tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     muse_response = response.split("Muse:")[-1].strip()
     return muse_response
+def save_data(chunks, embeddings, index, model_combination):
+    os.makedirs('data', exist_ok=True)
+    with open(f'data/chunks_{model_combination}.pkl', 'wb') as f:
         pickle.dump(chunks, f)
+    np.save(f'data/embeddings_{model_combination}.npy', embeddings)
+    faiss.write_index(index, f'data/faiss_index_{model_combination}.bin')
+def load_data(model_combination):
+    if os.path.exists(f'data/chunks_{model_combination}.pkl') and \
+       os.path.exists(f'data/embeddings_{model_combination}.npy') and \
+       os.path.exists(f'data/faiss_index_{model_combination}.bin'):
+        with open(f'data/chunks_{model_combination}.pkl', 'rb') as f:
             chunks = pickle.load(f)
+        embeddings = np.load(f'data/embeddings_{model_combination}.npy')
+        index = faiss.read_index(f'data/faiss_index_{model_combination}.bin')
         return chunks, embeddings, index
     return None, None, None
 if st.button("Load Selected Models"):
     with st.spinner("Loading models and data..."):
         embedding_tokenizer, embedding_model, generation_tokenizer, generation_model = load_models(st.session_state.model_combination)
+        # Try to load existing data
+        chunks, embeddings, index = load_data(st.session_state.model_combination)
+        # If data doesn't exist, process it and save
+        if chunks is None or embeddings is None or index is None:
+            chunks = load_and_process_text('ammons_muse.txt')
+            embeddings = create_embeddings(chunks, embedding_model)
+            index = create_faiss_index(embeddings)
+            save_data(chunks, embeddings, index, st.session_state.model_combination)
     st.session_state.models_loaded = True
+    st.session_state.chunks = chunks
+    st.session_state.embeddings = embeddings
+    st.session_state.index = index
+    st.success("Models and data loaded successfully!")
 if 'models_loaded' not in st.session_state or not st.session_state.models_loaded:
     st.warning("Please load the models before chatting.")
     with st.spinner("The Muse is contemplating..."):
         try:
+            response = generate_response(prompt, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, st.session_state.index, st.session_state.chunks)
         except Exception as e:
             response = f"I apologize, but I encountered an error: {str(e)}"