File size: 8,449 Bytes
ed9459f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bae2887
 
 
ed9459f
bae2887
ed9459f
 
 
 
 
bae2887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed9459f
 
bae2887
ed9459f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# --- Imports ---
import os
import re
import gradio as gr
import openai
from datetime import datetime
from bs4 import BeautifulSoup

# --- API Keys + Colab support ---
import os

openai_api_key = os.environ.get("OPENAI_API_KEY")
openrouter_key = os.environ.get("OPENROUTER")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY environment variable is not set.")
if not openrouter_key:
    raise ValueError("OPENROUTER environment variable is not set.")


client = openai.OpenAI(api_key=openai_api_key)
openai_rater = openai.OpenAI(api_key=openrouter_key, base_url="https://openrouter.ai/api/v1")

# --- Logger ---
log_filename = "rating_log.txt"
if not os.path.exists(log_filename):
    with open(log_filename, "w", encoding="utf-8") as f:
        f.write("=== Rating Log Initialized ===\n")

# --- Exadata Specs ---
exadata_specs = {
    "X7": {"Quarter Rack": {"max_iops": 350000, "max_throughput": 25},
           "Half Rack": {"max_iops": 700000, "max_throughput": 50},
           "Full Rack": {"max_iops": 1400000, "max_throughput": 100}},
    "X8": {"Quarter Rack": {"max_iops": 380000, "max_throughput": 28},
           "Half Rack": {"max_iops": 760000, "max_throughput": 56},
           "Full Rack": {"max_iops": 1520000, "max_throughput": 112}},
    "X9": {"Quarter Rack": {"max_iops": 450000, "max_throughput": 30},
           "Half Rack": {"max_iops": 900000, "max_throughput": 60},
           "Full Rack": {"max_iops": 1800000, "max_throughput": 120}},
    "X10": {"Quarter Rack": {"max_iops": 500000, "max_throughput": 35},
            "Half Rack": {"max_iops": 1000000, "max_throughput": 70},
            "Full Rack": {"max_iops": 2000000, "max_throughput": 140}},
    "X11M": {"Quarter Rack": {"max_iops": 600000, "max_throughput": 40},
             "Half Rack": {"max_iops": 1200000, "max_throughput": 80},
             "Full Rack": {"max_iops": 2400000, "max_throughput": 160}},
}

# --- Preprocessor ---
def clean_awr_content(content):
    if "<html" in content.lower():
        soup = BeautifulSoup(content, "html.parser")
        text = soup.get_text()
    else:
        text = content
    cleaned = "\n".join([line.strip() for line in text.splitlines() if line.strip()])
    return cleaned

# --- AWR Analyzer ---
def analyze_awr(content, performance_test_mode, exadata_model, rack_size):
    cleaned_content = clean_awr_content(content)
    max_chars = 128000
    if len(cleaned_content) > max_chars:
        cleaned_content = cleaned_content[:max_chars] + "\n\n[TRUNCATED]..."

# Build prompt
prompt = f"""
You are an expert Oracle Database performance analyst with deep knowledge of AWR reports, Oracle RAC internals, and Exadata architecture (Smart Scan, Flash Cache, IORM, RDMA, Storage Indexes). 

You must produce highly detailed diagnostic insights based on the AWR report provided below. Use numbers and thresholds whenever possible and explain why each observation matters. Do not simply say "high" or "low" β€” provide the metric, its value, and context (e.g., why 300 gc buffer busy waits/sec is high for OLTP). Explain implications in RAC/Exadata environments.

======== AWR REPORT START ========
{cleaned_content}
======== AWR REPORT END ========

Please provide the following sections with as much metric detail and technical context as possible:

- **Performance Summary**
    - Overall DB load, CPU usage, and major wait events.
    - Discuss RAC-specific behaviors such as global cache waits.

- **Detailed Analysis of Bottlenecks and/or Degradation Risks**
    - For each identified bottleneck, provide the metric (e.g. "gc buffer busy: 1500/sec") and explain why it is a problem.
    - Provide RAC-relevant interpretations (e.g. is GC messaging over interconnect too high).
    - Include flash cache and I/O specific risks.

- **Performance Forecast and Predictions**
    - Given the current metrics, predict where the system is heading.
    - Use thresholds to indicate risk (e.g. "Redo size at 500MB/min approaching flash log limit").

- **Specific Recommendations for Monitoring**
    - Suggest exactly which metrics should be tracked and why.
    - Include SQL_IDs, Global Cache metrics, Log IO, CPU.

- **Exadata Statistics Performance Summary**
    - Include IO performance, flash cache hit %, Smart Scan utilization.
    - Mention storage server level metrics (latency, MB/s, read/write balance).
    - Indicate if IO saturation occurred based on latency and throughput.

- **Recommended Next Steps to Bridge Performance Gap**
    - Generate action plans (e.g. SQL tuning, service affinity, adding storage cells, increasing LOG_BUFFER).
    - Clearly separate short term vs long term actions.
"""


    # Add Exadata comparison if performance test mode
    if performance_test_mode and exadata_model and rack_size:
        specs = exadata_specs.get(exadata_model, {}).get(rack_size, {})
        if specs:
            prompt += f"""

This was a PERFORMANCE TEST on Oracle Exadata {exadata_model} {rack_size}.
Theoretical Max:
- Max IOPS: {specs['max_iops']}
- Max Throughput: {specs['max_throughput']} GB/s

Show actual vs theoretical and generate Recommended Next Steps to Bridge Performance Gap.
"""

    # --- Call GPT-4o (or turbo) ---
    MODEL = "gpt-4-turbo"  # BEST (or change to gpt-4-turbo if needed)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are an expert Oracle Database performance analyst."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content.strip()


# --- Rater ---
def rate_answer_rater(question, final_answer):
    prompt = f"Rate this answer 1-5 stars with explanation:\n\n{final_answer}"
    response = openai_rater.chat.completions.create(
        model="mistral/ministral-8b",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

# --- Main Logic ---
def process_awr(awr_text, correctness_threshold, performance_test_mode, exadata_model, rack_size):
    if not awr_text.strip():
        return "No AWR report provided.", "", ""

    answer = analyze_awr(awr_text, performance_test_mode, exadata_model, rack_size)
    rating_text = rate_answer_rater("AWR Analysis", answer)

    stars = 0
    match = re.search(r"(\d+)", rating_text)
    if match:
        stars = int(match.group(1))

    if stars < correctness_threshold:
        answer_retry = analyze_awr(awr_text, performance_test_mode, exadata_model, rack_size)
        rating_text_retry = rate_answer_rater("AWR Analysis (Retry)", answer_retry)

        with open(log_filename, "a", encoding="utf-8") as log_file:
            log_file.write(f"\n---\n{datetime.now()} RETRY\nOriginal: {answer}\nRating: {rating_text}\nRetry: {answer_retry}\nRetry Rating: {rating_text_retry}\n")

        return answer_retry, rating_text_retry, "βœ… Retry Occurred (rating below threshold)"
    else:
        with open(log_filename, "a", encoding="utf-8") as log_file:
            log_file.write(f"\n---\n{datetime.now()} SUCCESS\nAnswer: {answer}\nRating: {rating_text}\n")

        return answer, rating_text, "βœ… Accepted on first try"

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“Š Oracle AWR Analyzer (AI + Rating + Retry + Exadata Gap Analysis)")

    awr_text = gr.Textbox(label="Paste AWR Report (HTML or TXT)", lines=30, placeholder="Paste full AWR here...")
    threshold = gr.Slider(0, 5, value=3, step=1, label="Correctness Threshold (Stars for Retry)")
    performance_test_mode = gr.Checkbox(label="Performance Test Mode")
    exadata_model = gr.Dropdown(choices=["X7", "X8", "X9", "X10", "X11M"], label="Exadata Model", visible=False)
    rack_size = gr.Dropdown(choices=["Quarter Rack", "Half Rack", "Full Rack"], label="Rack Size", visible=False)

    def toggle_visibility(mode):
        return gr.update(visible=mode), gr.update(visible=mode)

    performance_test_mode.change(toggle_visibility, inputs=performance_test_mode, outputs=[exadata_model, rack_size])

    analyze_btn = gr.Button("Analyze AWR")
    output = gr.Textbox(label="AWR Analysis Result", lines=15)
    rating = gr.Textbox(label="Rater Rating + Explanation", lines=4)
    retry_status = gr.Textbox(label="Retry Status")

    analyze_btn.click(process_awr, inputs=[awr_text, threshold, performance_test_mode, exadata_model, rack_size], outputs=[output, rating, retry_status])

demo.launch(debug=True)