Spaces:
Sleeping
Sleeping
# === Imports === | |
import os | |
import re | |
import gradio as gr | |
import openai | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
# --- API Keys --- | |
openai_api_key = os.environ.get("OPENAI_API_KEY") | |
if not openai_api_key: | |
raise ValueError("OPENAI_API_KEY environment variable is not set.") | |
client = openai.OpenAI(api_key=openai_api_key) | |
openrouter_key = os.environ.get("OPENROUTER") | |
openrouter = openai.OpenAI(api_key=openrouter_key, base_url="https://openrouter.ai/api/v1") | |
# --- Exadata Specs --- | |
exadata_specs = { | |
"X7": {"Quarter Rack": {"max_iops": 350000, "max_throughput": 25}, "Half Rack": {"max_iops": 700000, "max_throughput": 50}, "Full Rack": {"max_iops": 1400000, "max_throughput": 100}}, | |
"X8": {"Quarter Rack": {"max_iops": 380000, "max_throughput": 28}, "Half Rack": {"max_iops": 760000, "max_throughput": 56}, "Full Rack": {"max_iops": 1520000, "max_throughput": 112}}, | |
"X9": {"Quarter Rack": {"max_iops": 450000, "max_throughput": 30}, "Half Rack": {"max_iops": 900000, "max_throughput": 60}, "Full Rack": {"max_iops": 1800000, "max_throughput": 120}}, | |
"X10": {"Quarter Rack": {"max_iops": 500000, "max_throughput": 35}, "Half Rack": {"max_iops": 1000000, "max_throughput": 70}, "Full Rack": {"max_iops": 2000000, "max_throughput": 140}}, | |
"X11M": {"Quarter Rack": {"max_iops": 600000, "max_throughput": 40}, "Half Rack": {"max_iops": 1200000, "max_throughput": 80}, "Full Rack": {"max_iops": 2400000, "max_throughput": 160}}, | |
} | |
# --- Supported LLM Models --- | |
supported_llms = { | |
"gpt-3.5-turbo": "Fastest / Lowest Cost - General AWR Healthcheck", | |
"gpt-4-turbo": "Balanced - Production Performance Analysis", | |
"gpt-4o": "Deepest Analysis - Exadata, RAC, Smart Scan, Critical Issues", | |
} | |
# --- Utils --- | |
def clean_awr_content(content): | |
if "<html" in content.lower(): | |
soup = BeautifulSoup(content, "html.parser") | |
return soup.get_text() | |
return content | |
# === AGENTS === | |
class CriticalAnalyzerAgent: | |
def analyze(self, content, performance_test_mode, exadata_model, rack_size, llm_model): | |
cleaned_content = clean_awr_content(content) | |
if len(cleaned_content) > 128000: | |
cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..." | |
prompt = f""" | |
You are an expert Oracle DBA performance analyst specialized in AWR + Exadata. | |
Please perform advanced analysis on the following report: | |
======== AWR REPORT START ======== | |
{cleaned_content} | |
======== AWR REPORT END ======== | |
Required Output: | |
- Performance Summary (with metric values) | |
- Detailed Bottlenecks + Risks (quantified) | |
- Forecast + Predictions | |
- Monitoring Recommendations | |
- Exadata Statistics (IO, Flash Cache, Smart Scan) | |
- Recommended Next Steps to Bridge Gaps | |
""" | |
if performance_test_mode and exadata_model and rack_size: | |
specs = exadata_specs.get(exadata_model, {}).get(rack_size, {}) | |
if specs: | |
prompt += f""" | |
This was a PERFORMANCE TEST on Oracle Exadata {exadata_model} {rack_size}. | |
Theoretical Max: | |
- IOPS: {specs['max_iops']} | |
- Throughput: {specs['max_throughput']} GB/s | |
Compare observed vs theoretical. Recommend actions to close the performance gap. | |
""" | |
response = client.chat.completions.create( | |
model=llm_model, | |
messages=[ | |
{"role": "system", "content": "You are an expert Oracle DBA."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
return response.choices[0].message.content.strip() | |
class HealthAgent: | |
def check_health(self, content, llm_model): | |
cleaned_content = clean_awr_content(content) | |
if len(cleaned_content) > 128000: | |
cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..." | |
prompt = f""" | |
You are the Oracle AWR Health Analysis Agent. | |
Your primary responsibility is to detect and report ANY and ALL database health risks, alerts, warnings, or failures in the AWR report. | |
You MUST: | |
- Identify all issues marked as CRITICAL, WARNING, ALERT, FAILED, OFFLINE, CONFINED, DROPPED, or ERROR. | |
- Never omit or generalize. If something appears important, call it out. | |
- Classify each issue into: 🚨 CRITICAL / ⚠️ WARNING / ✅ INFO | |
- For CRITICAL and WARNING, provide suggested actions or considerations. | |
- Always confirm at the end if no CRITICAL or WARNING issues were found. | |
Special Attention Areas: | |
- Flash Cache or Flash Disk Failures | |
- I/O Subsystem stalls or errors | |
- ASM/Grid Disk issues | |
- Smart Scan failures | |
- Redo Log issues | |
- RAC Interconnect issues | |
AWR CONTENT: | |
{cleaned_content} | |
""" | |
response = client.chat.completions.create( | |
model=llm_model, | |
messages=[ | |
{"role": "system", "content": "You are the strict Oracle AWR Health Analysis Agent."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
return response.choices[0].message.content.strip() | |
class RaterAgent: | |
def rate(self, content): | |
prompt = f"Rate the following analysis from 1-5 stars and explain:\n\n{content}" | |
response = openrouter.chat.completions.create( | |
model="mistralai/Mixtral-8x7B-Instruct", | |
messages=[{"role": "user", "content": prompt}] | |
) | |
return response.choices[0].message.content.strip() | |
# === Main Process === | |
def process_awr(awr_text, threshold, performance_test_mode, exadata_model, rack_size, llm_model): | |
analyzer = CriticalAnalyzerAgent() | |
health = HealthAgent() | |
rater = RaterAgent() | |
if not awr_text.strip(): | |
return "No AWR text provided", "", "", "" | |
analysis = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size, llm_model) | |
health_status = health.check_health(awr_text, llm_model) | |
rating_text = rater.rate(analysis) | |
stars = 0 | |
match = re.search(r"(\d+)", rating_text) | |
if match: | |
stars = int(match.group(1)) | |
retry_status = "✅ Accepted" | |
if stars < threshold: | |
analysis_retry = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size, llm_model) | |
rating_text_retry = rater.rate(analysis_retry) | |
retry_status = "✅ Retry Occurred" | |
analysis = analysis_retry | |
rating_text = rating_text_retry | |
return analysis, health_status, rating_text, retry_status | |
# === Gradio UI === | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🧠 Multi-Agent Oracle AWR Analyzer (Production Edition)") | |
awr_text = gr.Textbox(label="Paste AWR Report", lines=30) | |
threshold = gr.Slider(0, 5, value=3, step=1, label="Correctness Threshold (Stars)") | |
performance_test_mode = gr.Checkbox(label="Performance Test Mode") | |
exadata_model = gr.Dropdown(choices=list(exadata_specs.keys()), label="Exadata Model", visible=False) | |
rack_size = gr.Dropdown(choices=["Quarter Rack", "Half Rack", "Full Rack"], label="Rack Size", visible=False) | |
llm_selector = gr.Dropdown(choices=list(supported_llms.keys()), value="gpt-4-turbo", label="LLM Model") | |
def toggle_visibility(mode): | |
return gr.update(visible=mode), gr.update(visible=mode) | |
performance_test_mode.change(toggle_visibility, inputs=performance_test_mode, outputs=[exadata_model, rack_size]) | |
analyze_btn = gr.Button("Analyze AWR Report") | |
output = gr.Textbox(label="AWR Analysis", lines=20) | |
health = gr.Textbox(label="Health Agent Findings", lines=10) | |
rating = gr.Textbox(label="Rater", lines=3) | |
retry_status = gr.Textbox(label="Retry Status") | |
analyze_btn.click(process_awr, | |
inputs=[awr_text, threshold, performance_test_mode, exadata_model, rack_size, llm_selector], | |
outputs=[output, health, rating, retry_status]) | |
demo.launch(debug=True) | |