NavyaNayer commited on
Commit
20ccf8e
·
verified ·
1 Parent(s): 79173f1

Delete intent_graphs.py

Browse files
Files changed (1) hide show
  1. intent_graphs.py +0 -88
intent_graphs.py DELETED
@@ -1,88 +0,0 @@
1
- import torch
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
6
- from sklearn.preprocessing import label_binarize
7
- from transformers import BertTokenizer, BertForSequenceClassification
8
- from datasets import load_dataset
9
-
10
- # Check for CUDA
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
-
13
- # Load dataset
14
- dataset = load_dataset("clinc_oos", "plus")
15
- label_names = dataset["train"].features["intent"].names # Ensure correct order
16
-
17
- # Load model
18
- num_labels = len(label_names)
19
- model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)
20
- model.load_state_dict(torch.load("intent_classifier.pth", map_location=device))
21
- model.to(device)
22
- model.eval()
23
-
24
- # Load tokenizer
25
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
26
-
27
- # Prepare data
28
- true_labels = []
29
- pred_labels = []
30
- all_probs = []
31
-
32
- for example in dataset["test"]:
33
- sentence = example["text"]
34
- true_label = example["intent"]
35
-
36
- # Tokenize
37
- inputs = tokenizer(sentence, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
38
- inputs = {key: val.to(device) for key, val in inputs.items()}
39
-
40
- # Predict
41
- with torch.no_grad():
42
- outputs = model(**inputs)
43
- probs = torch.nn.functional.softmax(outputs.logits, dim=1).cpu().numpy()[0]
44
- predicted_class = np.argmax(probs)
45
-
46
- # Store results
47
- true_labels.append(true_label)
48
- pred_labels.append(predicted_class)
49
- all_probs.append(probs)
50
-
51
- # Convert to numpy arrays
52
- true_labels = np.array(true_labels)
53
- pred_labels = np.array(pred_labels)
54
- all_probs = np.array(all_probs)
55
-
56
- # Compute confusion matrix
57
- conf_matrix = confusion_matrix(true_labels, pred_labels)
58
-
59
- # Plot confusion matrix
60
- plt.figure(figsize=(12, 10))
61
- sns.heatmap(conf_matrix, annot=False, fmt="d", cmap="Blues")
62
- plt.xlabel("Predicted Label")
63
- plt.ylabel("True Label")
64
- plt.title("Confusion Matrix for Intent Classification")
65
- plt.savefig("confusion_matrix.png", dpi=300, bbox_inches="tight")
66
- plt.close()
67
-
68
- print("Confusion matrix saved as confusion_matrix.png")
69
-
70
- # --- Multi-Class Precision-Recall Curve ---
71
- # Binarize true labels for multi-class PR calculation
72
- true_labels_bin = label_binarize(true_labels, classes=np.arange(num_labels))
73
-
74
- # Plot Precision-Recall Curve for multiple classes
75
- plt.figure(figsize=(10, 8))
76
- for i in range(num_labels):
77
- precision, recall, _ = precision_recall_curve(true_labels_bin[:, i], all_probs[:, i])
78
- plt.plot(recall, precision, lw=1, alpha=0.7, label=f"Class {i}: {label_names[i]}")
79
-
80
- plt.xlabel("Recall")
81
- plt.ylabel("Precision")
82
- plt.title("Multi-Class Precision-Recall Curve")
83
- plt.legend(loc="best", fontsize=6, ncol=2, frameon=True)
84
- plt.grid(True)
85
- plt.savefig("precision_recall_curve.png", dpi=300, bbox_inches="tight")
86
- plt.close()
87
-
88
- print("Precision-Recall curve saved as precision_recall_curve.png")