|
import tensorflow as tf |
|
from chatbot_model import RetrievalChatbot, ChatbotConfig |
|
from environment_setup import EnvironmentSetup |
|
from response_quality_checker import ResponseQualityChecker |
|
from chatbot_validator import ChatbotValidator |
|
from training_plotter import TrainingPlotter |
|
|
|
|
|
from logger_config import config_logger |
|
logger = config_logger(__name__) |
|
|
|
def run_interactive_chat(chatbot, quality_checker): |
|
"""Separate function for interactive chat loop""" |
|
while True: |
|
user_input = input("You: ") |
|
if user_input.lower() in ['quit', 'exit', 'bye']: |
|
print("Assistant: Goodbye!") |
|
break |
|
|
|
response, candidates, metrics = chatbot.chat( |
|
query=user_input, |
|
conversation_history=None, |
|
quality_checker=quality_checker, |
|
top_k=5 |
|
) |
|
|
|
print(f"Assistant: {response}") |
|
|
|
if metrics.get('is_confident', False): |
|
print("\nAlternative responses:") |
|
for resp, score in candidates[1:4]: |
|
print(f"Score: {score:.4f} - {resp}") |
|
|
|
def inspect_tfrecord(tfrecord_file_path, num_examples=3): |
|
def parse_example(example_proto): |
|
feature_description = { |
|
'query_ids': tf.io.FixedLenFeature([512], tf.int64), |
|
'positive_ids': tf.io.FixedLenFeature([512], tf.int64), |
|
'negative_ids': tf.io.FixedLenFeature([3 * 512], tf.int64), |
|
} |
|
return tf.io.parse_single_example(example_proto, feature_description) |
|
|
|
dataset = tf.data.TFRecordDataset(tfrecord_file_path) |
|
dataset = dataset.map(parse_example) |
|
|
|
for i, example in enumerate(dataset.take(num_examples)): |
|
print(f"Example {i+1}:") |
|
print(f"Query IDs: {example['query_ids'].numpy()}") |
|
print(f"Positive IDs: {example['positive_ids'].numpy()}") |
|
print(f"Negative IDs: {example['negative_ids'].numpy()}") |
|
print("-" * 50) |
|
|
|
def main(): |
|
|
|
|
|
|
|
|
|
|
|
tf.keras.backend.clear_session() |
|
env = EnvironmentSetup() |
|
env.initialize() |
|
|
|
|
|
EPOCHS = 20 |
|
TF_RECORD_FILE_PATH = 'training_data/training_data.tfrecord' |
|
|
|
|
|
batch_size = env.optimize_batch_size(base_batch_size=16) |
|
|
|
|
|
|
|
config = ChatbotConfig( |
|
embedding_dim=768, |
|
max_context_token_limit=512, |
|
freeze_embeddings=False, |
|
) |
|
|
|
|
|
|
|
chatbot = RetrievalChatbot(config, mode='training') |
|
chatbot.build_models() |
|
|
|
if chatbot.mode == 'preparation': |
|
chatbot.verify_faiss_index() |
|
|
|
chatbot.train_streaming( |
|
tfrecord_file_path=TF_RECORD_FILE_PATH, |
|
epochs=EPOCHS, |
|
batch_size=batch_size, |
|
use_lr_schedule=True, |
|
) |
|
|
|
|
|
model_save_path = env.training_dirs['base'] / 'final_model' |
|
chatbot.save_models(model_save_path) |
|
|
|
|
|
quality_checker = ResponseQualityChecker(chatbot=chatbot) |
|
validator = ChatbotValidator(chatbot, quality_checker) |
|
validation_metrics = validator.run_validation(num_examples=5) |
|
logger.info(f"Validation Metrics: {validation_metrics}") |
|
|
|
|
|
plotter = TrainingPlotter(save_dir=env.training_dirs['plots']) |
|
plotter.plot_training_history(chatbot.history) |
|
plotter.plot_validation_metrics(validation_metrics) |
|
|
|
|
|
logger.info("\nStarting interactive chat session...") |
|
run_interactive_chat(chatbot, quality_checker) |
|
|
|
if __name__ == "__main__": |
|
main() |