Spaces:

kgauvin603
/

Oracle-TANGO

Sleeping

File size: 7,050 Bytes

# === Imports ===
import os
import re
import gradio as gr
import openai
from datetime import datetime
from bs4 import BeautifulSoup

# --- API Keys ---
openai_api_key = os.environ.get("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("OPENAI_API_KEY environment variable is not set.")

client = openai.OpenAI(api_key=openai_api_key)

# --- Exadata Specs ---
exadata_specs = {
    "X7": {"Quarter Rack": {"max_iops": 350000, "max_throughput": 25}, "Half Rack": {"max_iops": 700000, "max_throughput": 50}, "Full Rack": {"max_iops": 1400000, "max_throughput": 100}},
    "X8": {"Quarter Rack": {"max_iops": 380000, "max_throughput": 28}, "Half Rack": {"max_iops": 760000, "max_throughput": 56}, "Full Rack": {"max_iops": 1520000, "max_throughput": 112}},
    "X9": {"Quarter Rack": {"max_iops": 450000, "max_throughput": 30}, "Half Rack": {"max_iops": 900000, "max_throughput": 60}, "Full Rack": {"max_iops": 1800000, "max_throughput": 120}},
    "X10": {"Quarter Rack": {"max_iops": 500000, "max_throughput": 35}, "Half Rack": {"max_iops": 1000000, "max_throughput": 70}, "Full Rack": {"max_iops": 2000000, "max_throughput": 140}},
    "X11M": {"Quarter Rack": {"max_iops": 600000, "max_throughput": 40}, "Half Rack": {"max_iops": 1200000, "max_throughput": 80}, "Full Rack": {"max_iops": 2400000, "max_throughput": 160}},
}

# --- Utils ---
def clean_awr_content(content):
    if "<html" in content.lower():
        soup = BeautifulSoup(content, "html.parser")
        return soup.get_text()
    return content

# === AGENTS ===

class CriticalAnalyzerAgent:
    def analyze(self, content, performance_test_mode, exadata_model, rack_size):
        cleaned_content = clean_awr_content(content)
        if len(cleaned_content) > 128000:
            cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..."

        prompt = f"""
You are an expert Oracle DBA performance analyst specialized in AWR + Exadata.

Please perform advanced analysis on the following report:

======== AWR REPORT START ========
{cleaned_content}
======== AWR REPORT END ========

Required Output:
- Performance Summary (with metric values)
- Detailed Bottlenecks + Risks (quantified)
- Forecast + Predictions
- Monitoring Recommendations
- Exadata Statistics (IO, Flash Cache, Smart Scan)
- Recommended Next Steps to Bridge Gaps
"""

        if performance_test_mode and exadata_model and rack_size:
            specs = exadata_specs.get(exadata_model, {}).get(rack_size, {})
            if specs:
                prompt += f"""

This was a PERFORMANCE TEST on Oracle Exadata {exadata_model} {rack_size}.
Theoretical Max:
- IOPS: {specs['max_iops']}
- Throughput: {specs['max_throughput']} GB/s

Compare observed vs theoretical. Recommend actions to close the performance gap.
"""

        response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "system", "content": "You are an expert Oracle DBA."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content.strip()

class HealthAgent:
    def check_health(self, content):
        cleaned_content = clean_awr_content(content)
        if len(cleaned_content) > 128000:
            cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..."

        prompt = f"""
You are the Oracle AWR Health Analysis Agent.

Your primary responsibility is to detect and report any possible database health risks or failures from the AWR report.

You MUST:
- Identify all mentions of warnings, failures, critical issues, and alerts.
- Do not omit any mention of failure or warnings, even minor ones.
- Clearly classify them as CRITICAL, WARNING, or INFO.
- Provide suggested actions for CRITICAL and WARNING level issues.
- Always confirm at the end if no CRITICAL or WARNING issues were found.

You are NOT allowed to generalize or omit any issue, no matter how small.

If Flash Cache, I/O, Smart Scan, ASM, Redo log, or RAC interconnect issues are mentioned, they MUST be called out explicitly.

AWR CONTENT:
{cleaned_content}
"""

        response = client.chat.completions.create(
            model="gpt-4-turbo",  # or "gpt-4o" if preferred/available
            messages=[
                {"role": "system", "content": "You are the strict Oracle AWR Health Analysis Agent."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content.strip()

class RaterAgent:
    def rate(self, content):
        prompt = f"Rate the following analysis from 1-5 stars and explain:\n\n{content}"
        response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()

# === Main Process ===
def process_awr(awr_text, threshold, performance_test_mode, exadata_model, rack_size):
    analyzer = CriticalAnalyzerAgent()
    health = HealthAgent()
    rater = RaterAgent()

    if not awr_text.strip():
        return "No AWR text provided", "", ""

    analysis = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size)
    health_status = health.check_health(awr_text)
    rating_text = rater.rate(analysis)

    stars = 0
    match = re.search(r"(\d+)", rating_text)
    if match:
        stars = int(match.group(1))

    retry_status = "✅ Accepted"

    if stars < threshold:
        analysis_retry = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size)
        rating_text_retry = rater.rate(analysis_retry)
        retry_status = "✅ Retry Occurred"
        analysis = analysis_retry
        rating_text = rating_text_retry

    return analysis, health_status, rating_text, retry_status

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Multi-Agent Oracle AWR Analyzer (Production Edition)")

    awr_text = gr.Textbox(label="Paste AWR Report", lines=30)
    threshold = gr.Slider(0, 5, value=3, step=1, label="Correctness Threshold (Stars)")
    performance_test_mode = gr.Checkbox(label="Performance Test Mode")
    exadata_model = gr.Dropdown(choices=list(exadata_specs.keys()), label="Exadata Model", visible=False)
    rack_size = gr.Dropdown(choices=["Quarter Rack", "Half Rack", "Full Rack"], label="Rack Size", visible=False)

    def toggle_visibility(mode):
        return gr.update(visible=mode), gr.update(visible=mode)

    performance_test_mode.change(toggle_visibility, inputs=performance_test_mode, outputs=[exadata_model, rack_size])

    analyze_btn = gr.Button("Analyze AWR Report")
    output = gr.Textbox(label="AWR Analysis", lines=20)
    health = gr.Textbox(label="Health Agent Findings", lines=5)
    rating = gr.Textbox(label="Rater", lines=3)
    retry_status = gr.Textbox(label="Retry Status")

    analyze_btn.click(process_awr, inputs=[awr_text, threshold, performance_test_mode, exadata_model, rack_size], outputs=[output, health, rating, retry_status])

demo.launch(debug=True)