Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# import packages
|
| 2 |
import shutil
|
| 3 |
import os
|
| 4 |
__import__('pysqlite3')
|
|
@@ -7,7 +6,6 @@ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
import chromadb
|
| 9 |
from datasets import load_dataset
|
| 10 |
-
# from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
import gradio as gr
|
| 12 |
from transformers import GPT2Tokenizer, GPT2Model
|
| 13 |
|
|
@@ -19,7 +17,6 @@ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
|
| 19 |
# Load the model with from_tf=True
|
| 20 |
model = GPT2Model.from_pretrained(model_name, from_tf=True)
|
| 21 |
|
| 22 |
-
|
| 23 |
# Function to clear the cache
|
| 24 |
def clear_cache(model_name):
|
| 25 |
cache_dir = os.path.expanduser(f'~/.cache/torch/sentence_transformers/{model_name.replace("/", "_")}')
|
|
@@ -29,12 +26,10 @@ def clear_cache(model_name):
|
|
| 29 |
else:
|
| 30 |
print(f"No cache directory found for: {cache_dir}")
|
| 31 |
|
| 32 |
-
|
| 33 |
# Embedding vector
|
| 34 |
class VectorStore:
|
| 35 |
def __init__(self, collection_name):
|
| 36 |
-
|
| 37 |
-
# Initialize the embedding model with try-except block for better error handling
|
| 38 |
try:
|
| 39 |
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
| 40 |
except Exception as e:
|
|
@@ -46,11 +41,11 @@ class VectorStore:
|
|
| 46 |
# Method to populate the vector store with embeddings from a dataset
|
| 47 |
def populate_vectors(self, dataset, batch_size=100):
|
| 48 |
# Use dataset streaming
|
| 49 |
-
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]')
|
| 50 |
|
| 51 |
-
# Process in batches
|
| 52 |
texts = []
|
| 53 |
-
|
|
|
|
| 54 |
title = example['title_cleaned']
|
| 55 |
recipe = example['recipe_new']
|
| 56 |
meal_type = example['meal_type']
|
|
@@ -66,6 +61,8 @@ class VectorStore:
|
|
| 66 |
self._process_batch(texts, i)
|
| 67 |
texts = []
|
| 68 |
|
|
|
|
|
|
|
| 69 |
# Process the remaining texts
|
| 70 |
if texts:
|
| 71 |
self._process_batch(texts, i)
|
|
@@ -79,24 +76,13 @@ class VectorStore:
|
|
| 79 |
query_embeddings = self.embedding_model.encode(query).tolist()
|
| 80 |
return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
|
| 81 |
|
| 82 |
-
#
|
| 83 |
vector_store = VectorStore("embedding_vector")
|
| 84 |
vector_store.populate_vectors(dataset=None)
|
| 85 |
|
| 86 |
-
|
| 87 |
-
# Load the model and tokenizer
|
| 88 |
-
# text generation model
|
| 89 |
-
# model_name = "meta-llama/Meta-Llama-3-8B"
|
| 90 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 91 |
-
# model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 92 |
-
|
| 93 |
-
# load model orca-mini general purpose model
|
| 94 |
-
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 95 |
-
# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
# Define the chatbot response function
|
|
|
|
|
|
|
| 100 |
def chatbot_response(user_input):
|
| 101 |
global conversation_history
|
| 102 |
results = vector_store.search_context(user_input, n_results=1)
|
|
@@ -108,13 +94,11 @@ def chatbot_response(user_input):
|
|
| 108 |
conversation_history.append(response)
|
| 109 |
return response
|
| 110 |
|
| 111 |
-
|
| 112 |
# Gradio interface
|
| 113 |
def chat(user_input):
|
| 114 |
response = chatbot_response(user_input)
|
| 115 |
return response
|
|
|
|
| 116 |
css = ".gradio-container {background: url(https://upload.wikimedia.org/wikipedia/commons/f/f5/Spring_Kitchen_Line-Up_%28Unsplash%29.jpg)}"
|
| 117 |
-
iface = gr.Interface(fn=chat, inputs="text", outputs="text",css=css)
|
| 118 |
iface.launch()
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
| 1 |
import shutil
|
| 2 |
import os
|
| 3 |
__import__('pysqlite3')
|
|
|
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
import chromadb
|
| 8 |
from datasets import load_dataset
|
|
|
|
| 9 |
import gradio as gr
|
| 10 |
from transformers import GPT2Tokenizer, GPT2Model
|
| 11 |
|
|
|
|
| 17 |
# Load the model with from_tf=True
|
| 18 |
model = GPT2Model.from_pretrained(model_name, from_tf=True)
|
| 19 |
|
|
|
|
| 20 |
# Function to clear the cache
|
| 21 |
def clear_cache(model_name):
|
| 22 |
cache_dir = os.path.expanduser(f'~/.cache/torch/sentence_transformers/{model_name.replace("/", "_")}')
|
|
|
|
| 26 |
else:
|
| 27 |
print(f"No cache directory found for: {cache_dir}")
|
| 28 |
|
|
|
|
| 29 |
# Embedding vector
|
| 30 |
class VectorStore:
|
| 31 |
def __init__(self, collection_name):
|
| 32 |
+
# Initialize the embedding model
|
|
|
|
| 33 |
try:
|
| 34 |
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
| 35 |
except Exception as e:
|
|
|
|
| 41 |
# Method to populate the vector store with embeddings from a dataset
|
| 42 |
def populate_vectors(self, dataset, batch_size=100):
|
| 43 |
# Use dataset streaming
|
| 44 |
+
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=True)
|
| 45 |
|
|
|
|
| 46 |
texts = []
|
| 47 |
+
i = 0 # Initialize index
|
| 48 |
+
for example in dataset:
|
| 49 |
title = example['title_cleaned']
|
| 50 |
recipe = example['recipe_new']
|
| 51 |
meal_type = example['meal_type']
|
|
|
|
| 61 |
self._process_batch(texts, i)
|
| 62 |
texts = []
|
| 63 |
|
| 64 |
+
i += 1 # Increment index
|
| 65 |
+
|
| 66 |
# Process the remaining texts
|
| 67 |
if texts:
|
| 68 |
self._process_batch(texts, i)
|
|
|
|
| 76 |
query_embeddings = self.embedding_model.encode(query).tolist()
|
| 77 |
return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
|
| 78 |
|
| 79 |
+
# Create a vector embedding
|
| 80 |
vector_store = VectorStore("embedding_vector")
|
| 81 |
vector_store.populate_vectors(dataset=None)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# Define the chatbot response function
|
| 84 |
+
conversation_history = []
|
| 85 |
+
|
| 86 |
def chatbot_response(user_input):
|
| 87 |
global conversation_history
|
| 88 |
results = vector_store.search_context(user_input, n_results=1)
|
|
|
|
| 94 |
conversation_history.append(response)
|
| 95 |
return response
|
| 96 |
|
|
|
|
| 97 |
# Gradio interface
|
| 98 |
def chat(user_input):
|
| 99 |
response = chatbot_response(user_input)
|
| 100 |
return response
|
| 101 |
+
|
| 102 |
css = ".gradio-container {background: url(https://upload.wikimedia.org/wikipedia/commons/f/f5/Spring_Kitchen_Line-Up_%28Unsplash%29.jpg)}"
|
| 103 |
+
iface = gr.Interface(fn=chat, inputs="text", outputs="text", css=css)
|
| 104 |
iface.launch()
|
|
|
|
|
|