Spaces:
Sleeping
Sleeping
File size: 7,804 Bytes
e428475 92b2ff7 e428475 92b2ff7 e428475 b86b291 de23917 b86b291 de23917 b86b291 e428475 b86b291 e428475 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import gradio as gr
import os
import torch
import numpy as np
import random
from huggingface_hub import login, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
# Set a seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
# Login to Hugging Face
token = os.getenv("hf_token")
HfFolder.save_token(token)
login(token)
# Model paths and quality mapping
model_paths = [
'karths/binary_classification_train_test',
'karths/binary_classification_train_requirement',
"karths/binary_classification_train_process",
"karths/binary_classification_train_infrastructure",
"karths/binary_classification_train_documentation",
"karths/binary_classification_train_design",
"karths/binary_classification_train_defect",
"karths/binary_classification_train_code",
"karths/binary_classification_train_build",
"karths/binary_classification_train_automation",
"karths/binary_classification_train_people",
"karths/binary_classification_train_architecture",
]
quality_mapping = {
'binary_classification_train_test': 'Test',
'binary_classification_train_requirement': 'Requirement',
'binary_classification_train_process': 'Process',
'binary_classification_train_infrastructure': 'Infrastructure',
'binary_classification_train_documentation': 'Documentation',
'binary_classification_train_design': 'Design',
'binary_classification_train_defect': 'Defect',
'binary_classification_train_code': 'Code',
'binary_classification_train_build': 'Build',
'binary_classification_train_automation': 'Automation',
'binary_classification_train_people': 'People',
'binary_classification_train_architecture':'Architecture'
}
# Pre-load models and tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
models = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths}
def get_quality_name(model_name):
return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality")
def model_prediction(model, text, device):
model.to(device)
model.eval()
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = softmax(logits.cpu().numpy(), axis=1)
avg_prob = np.mean(probs[:, 1])
return avg_prob
def main_interface(text):
if not text.strip():
return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", ""
# Check for text length exceeding the limit
if len(text) < 30:
return "<div style='color: red;'>Text is less than 30 characters.</div>", ""
device = "cuda" if torch.cuda.is_available() else "cpu"
results = []
for model_path, model in models.items():
quality_name = get_quality_name(model_path)
avg_prob = model_prediction(model, text, device)
if avg_prob >= 0.90: # Only consider probabilities >= 0.90
results.append((quality_name, avg_prob))
logging.info(f"Model: {model_path}, Quality: {quality_name}, Average Probability: {avg_prob:.3f}")
if not results: # If no results meet the criteria
return "<div style='color: red;'>No recommendation. Prediction probability is below the threshold. </div>", ""
top_qualities = sorted(results, key=lambda x: x[1], reverse=True)[:3]
output_html = render_html_output(top_qualities)
return output_html, ""
def render_html_output(top_qualities):
styles = """
<style>
.quality-container {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 20px;
}
.quality-label, .ranking {
display: inline-block;
padding: 0.5em 1em;
font-size: 18px;
font-weight: bold;
color: white;
background-color: #007bff;
border-radius: 0.5rem;
margin-right: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
.probability {
display: block;
margin-top: 10px;
font-size: 16px;
color: #007bff;
}
</style>
"""
html_content = ""
ranking_labels = ['Top 1 Prediction', 'Top 2 Prediction', 'Top 3 Prediction']
top_n = min(len(top_qualities), len(ranking_labels))
for i in range(top_n):
quality, prob = top_qualities[i]
html_content += f"""
<div class="quality-container">
<span class="ranking">{ranking_labels[i]}</span>
<span class="quality-label">{quality}</span>
</div>
"""
return styles + html_content
example_texts = [
["The algorithm does not accurately distinguish between the positive and negative classes during edge cases.\n\nEnvironment: Production\nReproduction: Run the classifier on the test dataset with known edge cases."],
["The system must handle at least 10,000 simultaneous users without performance degradation.\n\nEnvironment: Server-side processing\nReproduction: Conduct load testing simulating 10,000 users."],
["There is a lack of consistency in code reviews, leading to varied quality in commits.\n\nEnvironment: Development team\nReproduction: Review the last month's commit logs and code review histories."],
["The API documentation is outdated, leading to incorrect usage by developers.\n\nEnvironment: Online documentation portal\nReproduction: Compare the endpoint documentation against the latest API code base."],
["The current system architecture does not support horizontal scaling, which is necessary for handling increased loads.\n\nEnvironment: System architecture review\nReproduction: Analyse the current deployment and propose necessary changes for scalability."],
["Users experience data loss when the network connection is unstable during data transmission.\n\nEnvironment: Mobile app, unstable network conditions\nReproduction: Test the data sync feature under various network conditions."],
["The build fails intermittently on the CI server due to timing issues in test scripts.\n\nEnvironment: CI server Jenkins\nReproduction: Trigger the build process multiple times and note the occurrence of failures."],
["The regression tests do not cover scenarios involving concurrent user sessions.\n\nEnvironment: Test automation suite\nReproduction: Update the test scripts to include tests for concurrent sessions."],
["There is frequent miscommunication between the development and QA teams regarding feature specifications.\n\nEnvironment: Inter-team meetings\nReproduction: Audit recent communication logs and meeting notes between the teams."],
["The service-oriented architecture does not effectively isolate failures, leading to cascading failures across services.\n\nEnvironment: Microservices architecture\nReproduction: Simulate a service failure and observe the impact on other services."]
]
interface = gr.Interface(
fn=main_interface,
inputs=gr.Textbox(lines=7, label="Issue Description", placeholder="Enter your issue text here"),
outputs=[gr.HTML(label="Prediction Output"), gr.Textbox(label="Predictions", visible=False)],
title="QualityTagger",
description="This tool classifies text into different quality domains such as Security, Usability, etc.",
examples=example_texts
)
interface.launch(share=True) |