Spaces:

NavyaNayer
/

Moodify-Task

Sleeping

App Files Files Community

Moodify-Task / src /intent_graphs.py

NavyaNayer

Upload 25 files

267744b verified 6 months ago

raw

history blame

3.02 kB

	import torch
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
	from sklearn.preprocessing import label_binarize
	from transformers import BertTokenizer, BertForSequenceClassification
	from datasets import load_dataset

	# Check for CUDA
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load dataset
	dataset = load_dataset("clinc_oos", "plus")
	label_names = dataset["train"].features["intent"].names # Ensure correct order

	# Load model
	num_labels = len(label_names)
	model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)
	model.load_state_dict(torch.load("intent_classifier.pth", map_location=device))
	model.to(device)
	model.eval()

	# Load tokenizer
	tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

	# Prepare data
	true_labels = []
	pred_labels = []
	all_probs = []

	for example in dataset["test"]:
	sentence = example["text"]
	true_label = example["intent"]

	# Tokenize
	inputs = tokenizer(sentence, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
	inputs = {key: val.to(device) for key, val in inputs.items()}

	# Predict
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1).cpu().numpy()[0]
	predicted_class = np.argmax(probs)

	# Store results
	true_labels.append(true_label)
	pred_labels.append(predicted_class)
	all_probs.append(probs)

	# Convert to numpy arrays
	true_labels = np.array(true_labels)
	pred_labels = np.array(pred_labels)
	all_probs = np.array(all_probs)

	# Compute confusion matrix
	conf_matrix = confusion_matrix(true_labels, pred_labels)

	# Plot confusion matrix
	plt.figure(figsize=(12, 10))
	sns.heatmap(conf_matrix, annot=False, fmt="d", cmap="Blues")
	plt.xlabel("Predicted Label")
	plt.ylabel("True Label")
	plt.title("Confusion Matrix for Intent Classification")
	plt.savefig("confusion_matrix.png", dpi=300, bbox_inches="tight")
	plt.close()

	print("Confusion matrix saved as confusion_matrix.png")

	# --- Multi-Class Precision-Recall Curve ---
	# Binarize true labels for multi-class PR calculation
	true_labels_bin = label_binarize(true_labels, classes=np.arange(num_labels))

	# Plot Precision-Recall Curve for multiple classes
	plt.figure(figsize=(10, 8))
	for i in range(num_labels):
	precision, recall, _ = precision_recall_curve(true_labels_bin[:, i], all_probs[:, i])
	plt.plot(recall, precision, lw=1, alpha=0.7, label=f"Class {i}: {label_names[i]}")

	plt.xlabel("Recall")
	plt.ylabel("Precision")
	plt.title("Multi-Class Precision-Recall Curve")
	plt.legend(loc="best", fontsize=6, ncol=2, frameon=True)
	plt.grid(True)
	plt.savefig("precision_recall_curve.png", dpi=300, bbox_inches="tight")
	plt.close()

	print("Precision-Recall curve saved as precision_recall_curve.png")