Spaces:
Sleeping
Sleeping
File size: 12,888 Bytes
ea53a0c b17847f fc1a467 b17847f fc1a467 9ea277f fc1a467 9ea277f cf8f67f fc1a467 9ea277f fc1a467 9ea277f fc1a467 ea53a0c b17847f fc1a467 b17847f fc1a467 b17847f ce50538 b17847f 08e0713 b17847f 08e0713 b17847f ea53a0c b17847f ea53a0c b17847f ea53a0c b17847f ea53a0c b17847f 07dafd9 b17847f ea53a0c 07dafd9 b17847f 2450969 b17847f 07dafd9 b17847f ea53a0c b17847f 2450969 b17847f 07dafd9 b17847f ea53a0c 07dafd9 ea53a0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
# === Imports ===
import os
import oci
import re
import gradio as gr
import openai
import oci
from datetime import datetime
from bs4 import BeautifulSoup
# --- API Keys ---
openai_api_key = os.environ.get("OPENAI_API_KEY")
if not openai_api_key:
raise ValueError("OPENAI_API_KEY environment variable is not set.")
client = openai.OpenAI(api_key=openai_api_key)
openrouter_key = os.environ.get("OPENROUTER")
openrouter = openai.OpenAI(api_key=openrouter_key, base_url="https://openrouter.ai/api/v1")
# --- OCI Object Storage: Explicit Fixed Configuration ---
# === OCI Object Storage Setup ===
oci_config = {
"user": os.environ.get("OCI_USER"),
"tenancy": os.environ.get("OCI_TENANCY"),
"fingerprint": os.environ.get("OCI_FINGERPRINT"),
"region": os.environ.get("OCI_REGION"),
"key_content": os.environ.get("OCI_PRIVATE_KEY")
}
namespace = os.environ.get("OCI_NAMESPACE")
bucket_name = os.environ.get("OCI_BUCKET_NAME")
try:
object_storage = oci.object_storage.ObjectStorageClient(oci_config)
except Exception as e:
print("Failed to initialize OCI Object Storage client:", e)
"""
# --- OCI Object Storage Setup ---
oci_config = {
"user": os.environ.get("OCI_USER"),
"tenancy": os.environ.get("OCI_TENANCY"),
"fingerprint": os.environ.get("OCI_FINGERPRINT"),
"region": os.environ.get("OCI_REGION"),
"key_content": os.environ.get("OCI_PRIVATE_KEY")
}
namespace = os.environ.get("OCI_NAMESPACE")
bucket_name = os.environ.get("OCI_BUCKET_NAME")
os.environ["OCI_BUCKET_NAME"] = "OracleTANGO"
try:
object_storage = oci.object_storage.ObjectStorageClient(oci_config)
except Exception as e:
print("Failed to initialize OCI Object Storage client:", e)
"""
# --- Exadata Specs ---
exadata_specs = {
"X7": {"Quarter Rack": {"max_iops": 350000, "max_throughput": 25}, "Half Rack": {"max_iops": 700000, "max_throughput": 50}, "Full Rack": {"max_iops": 1400000, "max_throughput": 100}},
"X8": {"Quarter Rack": {"max_iops": 380000, "max_throughput": 28}, "Half Rack": {"max_iops": 760000, "max_throughput": 56}, "Full Rack": {"max_iops": 1520000, "max_throughput": 112}},
"X9": {"Quarter Rack": {"max_iops": 450000, "max_throughput": 30}, "Half Rack": {"max_iops": 900000, "max_throughput": 60}, "Full Rack": {"max_iops": 1800000, "max_throughput": 120}},
"X10": {"Quarter Rack": {"max_iops": 500000, "max_throughput": 35}, "Half Rack": {"max_iops": 1000000, "max_throughput": 70}, "Full Rack": {"max_iops": 2000000, "max_throughput": 140}},
"X11M": {"Quarter Rack": {"max_iops": 600000, "max_throughput": 40}, "Half Rack": {"max_iops": 1200000, "max_throughput": 80}, "Full Rack": {"max_iops": 2400000, "max_throughput": 160}},
}
# --- Supported LLM Models ---
supported_llms = {
"gpt-3.5-turbo": "Fastest / Lowest Cost - General AWR Healthcheck",
"gpt-4-turbo": "Balanced - Production Performance Analysis",
"gpt-4o": "Deepest Analysis - Exadata, RAC, Smart Scan, Critical Issues",
"gpt-4.1": "Great for quick coding and analysis",
}
# --- Utils ---
def clean_awr_content(content):
if "<html" in content.lower():
soup = BeautifulSoup(content, "html.parser")
return soup.get_text()
return content
def upload_awr_file(file_obj):
filename = os.path.basename(file_obj)
with open(file_obj, "rb") as f:
content = f.read()
object_storage.put_object(namespace, bucket_name, filename, content)
return f"\u2705 Uploaded {filename}"
def list_awr_files():
try:
objects = object_storage.list_objects(namespace, bucket_name)
return [obj.name for obj in objects.data.objects if obj.name.endswith(".html") or obj.name.endswith(".txt")]
except Exception as e:
return [f"Error listing objects: {str(e)}"]
def get_awr_file_text(filename):
try:
response = object_storage.get_object(namespace, bucket_name, filename)
raw = response.data.content.decode()
return clean_awr_content(raw)
except Exception as e:
return f"Error loading file: {str(e)}"
def compare_awrs(file_list, llm_model):
if not file_list:
return "No files selected."
combined_text = ""
for fname in file_list:
content = get_awr_file_text(fname)
combined_text += f"\n=== AWR: {fname} ===\n{content[:3000]}...\n"
prompt = f"""You are a senior Oracle performance engineer. You will compare multiple AWR reports and highlight:
- Key differences in workload or system behavior
- Major trends or anomalies
- Which report shows better performance and why
- Exadata-specific metrics like Smart Scan, Flash I/O
- Suggestions to unify or improve system behavior
AWR Reports:
{combined_text}
"""
response = client.chat.completions.create(
model=llm_model,
messages=[
{"role": "system", "content": "You are a comparative AWR analysis expert."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content.strip()
# === AGENTS ===
class CriticalAnalyzerAgent:
def analyze(self, content, performance_test_mode, exadata_model, rack_size, llm_model):
cleaned_content = clean_awr_content(content)
if len(cleaned_content) > 128000:
cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..."
prompt = f"""You are an expert Oracle DBA performance analyst specialized in AWR + Exadata.
Please perform advanced analysis on the following report:
======== AWR REPORT START ========
{cleaned_content}
======== AWR REPORT END ========
Required Output:
- Performance Summary (with metric values)
- Detailed Bottlenecks + Risks (quantified)
- Forecast + Predictions
- Monitoring Recommendations
- Exadata Statistics (IO, Flash Cache, Smart Scan)
- Recommended Next Steps to Bridge Gaps
"""
if performance_test_mode and exadata_model and rack_size:
specs = exadata_specs.get(exadata_model, {}).get(rack_size, {})
if specs:
prompt += f"""
This was a PERFORMANCE TEST on Oracle Exadata {exadata_model} {rack_size}.
Theoretical Max:
- IOPS: {specs['max_iops']}
- Throughput: {specs['max_throughput']} GB/s
Compare observed vs theoretical. Recommend actions to close the performance gap.
"""
response = client.chat.completions.create(
model=llm_model,
messages=[
{"role": "system", "content": "You are an expert Oracle DBA."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content.strip()
class HealthAgent:
def check_health(self, content, llm_model):
cleaned_content = clean_awr_content(content)
if len(cleaned_content) > 128000:
cleaned_content = cleaned_content[:128000] + "\n\n[TRUNCATED]..."
prompt = f"""You are the Oracle AWR Health Analysis Agent.
Your primary responsibility is to detect and report ANY and ALL database health risks, alerts, warnings, or failures in the AWR report.
You MUST:
- Identify all issues marked as CRITICAL, WARNING, ALERT, FAILED, OFFLINE, CONFINED, DROPPED, or ERROR.
- Never omit or generalize. If something appears important, call it out.
- Classify each issue into: π¨ CRITICAL / β οΈ WARNING / β
INFO
- For CRITICAL and WARNING, provide suggested actions or considerations.
- Always confirm at the end if no CRITICAL or WARNING issues were found.
Special Attention Areas:
- Flash Cache or Flash Disk Failures
- I/O Subsystem stalls or errors
- ASM/Grid Disk issues
- Smart Scan failures
- Redo Log issues
- RAC Interconnect issues
AWR CONTENT:
{cleaned_content}
"""
response = client.chat.completions.create(
model=llm_model,
messages=[
{"role": "system", "content": "You are the strict Oracle AWR Health Analysis Agent."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content.strip()
class RaterAgent:
def rate(self, content):
prompt = f"Rate the following analysis from 1-5 stars and explain:\n\n{content}"
response = openrouter.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
# === MAIN AWR PROCESS ===
def process_awr(awr_text, threshold, performance_test_mode, exadata_model, rack_size, llm_model):
analyzer = CriticalAnalyzerAgent()
health = HealthAgent()
rater = RaterAgent()
if not awr_text.strip():
return "No AWR text provided", "", "", ""
analysis = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size, llm_model)
health_status = health.check_health(awr_text, llm_model)
rating_text = rater.rate(analysis)
stars = 0
match = re.search(r"(\d+)", rating_text)
if match:
stars = int(match.group(1))
retry_status = "β
Accepted"
if stars < threshold:
analysis = analyzer.analyze(awr_text, performance_test_mode, exadata_model, rack_size, llm_model)
rating_text = rater.rate(analysis)
retry_status = "β
Retry Occurred"
return analysis, health_status, rating_text, retry_status
# === Gradio UI ===
with gr.Blocks() as demo:
with gr.Tab("Manual AWR Analysis"):
gr.Markdown("# π§ Multi-Agent Oracle AWR Analyzer (Production Edition)")
# NEW: File upload for AWR
awr_file = gr.File(label="Upload AWR Report (.html or .txt)", file_types=[".html", ".txt"])
awr_text = gr.Textbox(label="AWR Report (pasted or loaded)", lines=30)
def awr_file_to_text(file_obj):
if not file_obj:
return ""
# Gradio File is a dict-like with 'name' and 'path'
filename = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
# Support io.BytesIO for Gradio's in-memory files, fallback to path
try:
# Gradio may provide either path or file
content = file_obj.read() if hasattr(file_obj, "read") else open(file_obj, "rb").read()
except Exception:
with open(file_obj, "rb") as f:
content = f.read()
# Decode as text (may need to try 'latin-1' for Oracle HTML)
try:
text = content.decode()
except Exception:
text = content.decode("latin-1")
return clean_awr_content(text)
awr_file.upload(awr_file_to_text, inputs=awr_file, outputs=awr_text)
# Everything below is the same as before
threshold = gr.Slider(0, 5, value=3, step=1, label="Correctness Threshold (Stars)")
performance_test_mode = gr.Checkbox(label="Performance Test Mode")
exadata_model = gr.Dropdown(choices=list(exadata_specs.keys()), label="Exadata Model", visible=False)
rack_size = gr.Dropdown(choices=["Quarter Rack", "Half Rack", "Full Rack"], label="Rack Size", visible=False)
llm_selector = gr.Dropdown(choices=list(supported_llms.keys()), value="gpt-4.1", label="LLM Model")
def toggle_visibility(mode):
return gr.update(visible=mode), gr.update(visible=mode)
performance_test_mode.change(toggle_visibility, inputs=performance_test_mode, outputs=[exadata_model, rack_size])
analyze_btn = gr.Button("Analyze AWR Report")
output = gr.Textbox(label="AWR Analysis", lines=20)
health = gr.Textbox(label="Health Agent Findings", lines=10)
rating = gr.Textbox(label="Rater", lines=3)
retry_status = gr.Textbox(label="Retry Status")
analyze_btn.click(
process_awr,
inputs=[awr_text, threshold, performance_test_mode, exadata_model, rack_size, llm_selector],
outputs=[output, health, rating, retry_status]
)
with gr.Tab("Compare AWRs from OCI"):
upload_file = gr.File(label="Upload AWR Report", file_types=[".html", ".txt"])
upload_status = gr.Textbox(label="Upload Status")
upload_file.upload(fn=upload_awr_file, inputs=upload_file, outputs=upload_status)
refresh_button = gr.Button("π Refresh File List")
file_multiselect = gr.Dropdown(choices=[], label="Select AWR Files", multiselect=True)
refresh_button.click(fn=lambda: gr.update(choices=list_awr_files()), outputs=file_multiselect)
llm_compare = gr.Dropdown(choices=list(supported_llms.keys()), value="gpt-4.1", label="LLM Model for Comparison")
compare_output = gr.Textbox(label="Comparison Output", lines=20)
gr.Button("Compare Selected AWRs").click(
fn=compare_awrs,
inputs=[file_multiselect, llm_compare],
outputs=compare_output
)
if __name__ == "__main__":
demo.launch(debug=True)
|