Spaces:
Sleeping
Sleeping
File size: 5,834 Bytes
e428475 92b2ff7 e428475 92b2ff7 e428475 60c708b e428475 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import os
import torch
import numpy as np
import random
from huggingface_hub import login, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
# Set a seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
# Login to Hugging Face
token = os.getenv("hf_token")
HfFolder.save_token(token)
login(token)
# Model paths and quality mapping
model_paths = [
'karths/binary_classification_train_test',
'karths/binary_classification_train_requirement',
"karths/binary_classification_train_process",
"karths/binary_classification_train_infrastructure",
"karths/binary_classification_train_documentation",
"karths/binary_classification_train_design",
"karths/binary_classification_train_defect",
"karths/binary_classification_train_code",
"karths/binary_classification_train_build",
"karths/binary_classification_train_automation",
"karths/binary_classification_train_people",
"karths/binary_classification_train_architecture",
]
quality_mapping = {
'binary_classification_train_test': 'Test',
'binary_classification_train_requirement': 'Requirement',
'binary_classification_train_process': 'Process',
'binary_classification_train_infrastructure': 'Infrastructure',
'binary_classification_train_documentation': 'Documentation',
'binary_classification_train_design': 'Design',
'binary_classification_train_defect': 'Defect',
'binary_classification_train_code': 'Code',
'binary_classification_train_build': 'Build',
'binary_classification_train_automation': 'Automation',
'binary_classification_train_people': 'People',
'binary_classification_train_architecture':'Architecture'
}
# Pre-load models and tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
models = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths}
def get_quality_name(model_name):
return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality")
def model_prediction(model, text, device):
model.to(device)
model.eval()
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = softmax(logits.cpu().numpy(), axis=1)
avg_prob = np.mean(probs[:, 1])
return avg_prob
def main_interface(text):
if not text.strip():
return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", ""
# Check for text length exceeding the limit
if len(text) < 30:
return "<div style='color: red;'>Text is less than 30 characters.</div>", ""
device = "cuda" if torch.cuda.is_available() else "cpu"
results = []
for model_path, model in models.items():
quality_name = get_quality_name(model_path)
avg_prob = model_prediction(model, text, device)
if avg_prob >= 0.90: # Only consider probabilities >= 0.90
results.append((quality_name, avg_prob))
logging.info(f"Model: {model_path}, Quality: {quality_name}, Average Probability: {avg_prob:.3f}")
if not results: # If no results meet the criteria
return "<div style='color: red;'>No recommendation. Prediction probability is below the threshold. </div>", ""
top_qualities = sorted(results, key=lambda x: x[1], reverse=True)[:3]
output_html = render_html_output(top_qualities)
return output_html, ""
def render_html_output(top_qualities):
styles = """
<style>
.quality-container {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 20px;
}
.quality-label, .ranking {
display: inline-block;
padding: 0.5em 1em;
font-size: 18px;
font-weight: bold;
color: white;
background-color: #007bff;
border-radius: 0.5rem;
margin-right: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
.probability {
display: block;
margin-top: 10px;
font-size: 16px;
color: #007bff;
}
</style>
"""
html_content = ""
ranking_labels = ['Top 1 Prediction', 'Top 2 Prediction', 'Top 3 Prediction']
top_n = min(len(top_qualities), len(ranking_labels))
for i in range(top_n):
quality, prob = top_qualities[i]
html_content += f"""
<div class="quality-container">
<span class="ranking">{ranking_labels[i]}</span>
<span class="quality-label">{quality}</span>
</div>
"""
return styles + html_content
example_texts = [
["Issues with newer operating systems. The application fails to start or crashes shortly after launch, likely due to deprecated libraries.\n\nEnvironment: Desktop app version 1.8, Windows 11\nReproduction: Install on a system running Windows 11, attempt to launch the application."]
]
interface = gr.Interface(
fn=main_interface,
inputs=gr.Textbox(lines=7, label="Issue Description", placeholder="Enter your issue text here"),
outputs=[gr.HTML(label="Prediction Output"), gr.Textbox(label="Predictions", visible=False)],
title="QualityTagger",
description="This tool classifies text into different quality domains such as Security, Usability, etc.",
examples=example_texts
)
interface.launch(share=True) |