Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
|
|
| 7 |
import chromadb
|
| 8 |
from datasets import load_dataset
|
| 9 |
import gradio as gr
|
| 10 |
-
from transformers import GPT2Tokenizer,
|
| 11 |
|
| 12 |
model_name = "Amitesh007/text_generation-finetuned-gpt2"
|
| 13 |
|
|
@@ -15,7 +15,7 @@ model_name = "Amitesh007/text_generation-finetuned-gpt2"
|
|
| 15 |
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
| 16 |
|
| 17 |
# Load the model with from_tf=True
|
| 18 |
-
model =
|
| 19 |
|
| 20 |
# Function to clear the cache
|
| 21 |
def clear_cache(model_name):
|
|
@@ -80,6 +80,41 @@ class VectorStore:
|
|
| 80 |
vector_store = VectorStore("embedding_vector")
|
| 81 |
vector_store.populate_vectors(dataset=None)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# Define the chatbot response function
|
| 84 |
conversation_history = []
|
| 85 |
|
|
|
|
| 7 |
import chromadb
|
| 8 |
from datasets import load_dataset
|
| 9 |
import gradio as gr
|
| 10 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
|
| 11 |
|
| 12 |
model_name = "Amitesh007/text_generation-finetuned-gpt2"
|
| 13 |
|
|
|
|
| 15 |
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
| 16 |
|
| 17 |
# Load the model with from_tf=True
|
| 18 |
+
model = GPT2LMHeadModel.from_pretrained(model_name, from_tf=True)
|
| 19 |
|
| 20 |
# Function to clear the cache
|
| 21 |
def clear_cache(model_name):
|
|
|
|
| 80 |
vector_store = VectorStore("embedding_vector")
|
| 81 |
vector_store.populate_vectors(dataset=None)
|
| 82 |
|
| 83 |
+
# Fine-tuning function
|
| 84 |
+
def fine_tune_model():
|
| 85 |
+
# Load your dataset
|
| 86 |
+
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=False)
|
| 87 |
+
|
| 88 |
+
# Prepare the data for training
|
| 89 |
+
def tokenize_function(examples):
|
| 90 |
+
return tokenizer(examples['title_cleaned'] + " " + examples['recipe_new'], padding="max_length", truncation=True)
|
| 91 |
+
|
| 92 |
+
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
| 93 |
+
|
| 94 |
+
# Define training arguments
|
| 95 |
+
training_args = TrainingArguments(
|
| 96 |
+
output_dir="./results",
|
| 97 |
+
evaluation_strategy="epoch",
|
| 98 |
+
learning_rate=2e-5,
|
| 99 |
+
per_device_train_batch_size=8,
|
| 100 |
+
per_device_eval_batch_size=8,
|
| 101 |
+
num_train_epochs=3,
|
| 102 |
+
weight_decay=0.01,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Initialize Trainer
|
| 106 |
+
trainer = Trainer(
|
| 107 |
+
model=model,
|
| 108 |
+
args=training_args,
|
| 109 |
+
train_dataset=tokenized_datasets,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Train the model
|
| 113 |
+
trainer.train()
|
| 114 |
+
|
| 115 |
+
# Fine-tune the model
|
| 116 |
+
fine_tune_model()
|
| 117 |
+
|
| 118 |
# Define the chatbot response function
|
| 119 |
conversation_history = []
|
| 120 |
|