Spaces:

meet12341234
/

LogLens

Sleeping

File size: 11,477 Bytes

import gradio as gr
import json
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TFAutoModelForSeq2SeqLM

# --- Model Loading ---
# Summarization model (BART)
def load_summarizer():
    model_name = "VidhuMathur/bart-log-summarization"
    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    summarizer = pipeline(
        "summarization",
        model=model,
        tokenizer=tokenizer,
        device=0 if torch.cuda.is_available() else -1,
    )
    return summarizer

# Causal LM for analysis (Qwen)
def load_qwen():
    model_name = "Qwen/Qwen3-0.6B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    )
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer

# --- Core Pipeline Functions ---
def extract_json_simple(text):
    start = text.find('{')
    if start == -1:
        return None
    brace_count = 0
    end = start
    for i, char in enumerate(text[start:], start):
        if char == '{':
            brace_count += 1
        elif char == '}':
            brace_count -= 1
            if brace_count == 0:
                end = i + 1
                break
    if brace_count == 0:
        return text[start:end]
    return None

def ensure_required_keys(analysis, summary):
    required_keys = {
        "root_cause": f"Issue identified from log analysis: {summary[:100]}...",
        "debugging_steps": [
            "Check system logs for error patterns",
            "Verify service status and configuration",
            "Test connectivity and permissions"
        ],
        "debug_commands": [
            "systemctl status service-name",
            "journalctl -u service-name -n 50",
            "netstat -tlnp | grep port"
        ],
        "useful_links": [
            "https://docs.system-docs.com/troubleshooting",
            "https://stackoverflow.com/questions/tagged/debugging"
        ]
    }
    for key, default_value in required_keys.items():
        if key not in analysis or not analysis[key]:
            analysis[key] = default_value
        elif isinstance(analysis[key], list) and len(analysis[key]) == 0:
            analysis[key] = default_value
    return analysis

def create_fallback_analysis(summary):
    summary_lower = summary.lower()
    if any(word in summary_lower for word in ['database', 'connection', 'sql']):
        return {
            "root_cause": "Database connection issue detected in the logs",
            "debugging_steps": [
                "Check if database service is running",
                "Verify database connection parameters",
                "Test network connectivity to database server",
                "Check database user permissions"
            ],
            "debug_commands": [
                "sudo systemctl status postgresql",
                "netstat -an | grep 5432",
                "psql -U username -h host -d database",
                "ping database-host"
            ],
            "useful_links": [
                "https://www.postgresql.org/docs/current/runtime.html",
                "https://dev.mysql.com/doc/refman/8.0/en/troubleshooting.html"
            ]
        }
    elif any(word in summary_lower for word in ['memory', 'heap', 'oom']):
        return {
            "root_cause": "Memory exhaustion or memory leak detected",
            "debugging_steps": [
                "Monitor current memory usage",
                "Check for memory leaks in application",
                "Review JVM heap settings if Java application",
                "Analyze memory dump if available"
            ],
            "debug_commands": [
                "free -h",
                "top -o %MEM",
                "jstat -gc PID",
                "ps aux --sort=-%mem | head"
            ],
            "useful_links": [
                "https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/memleaks.html",
                "https://linux.die.net/man/1/free"
            ]
        }
    elif any(word in summary_lower for word in ['disk', 'space', 'full']):
        return {
            "root_cause": "Disk space exhaustion causing system issues",
            "debugging_steps": [
                "Check disk usage across all filesystems",
                "Identify largest files and directories",
                "Clean up temporary files and logs",
                "Check for deleted files held by processes"
            ],
            "debug_commands": [
                "df -h",
                "du -sh /* | sort -hr",
                "find /var/log -type f -size +100M",
                "lsof +L1"
            ],
            "useful_links": [
                "https://linux.die.net/man/1/df",
                "https://www.cyberciti.biz/faq/linux-check-disk-space-command/"
            ]
        }
    else:
        return {
            "root_cause": f"System issue detected: {summary[:100]}...",
            "debugging_steps": [
                "Review complete error logs",
                "Check system resource usage",
                "Verify service configurations",
                "Test system connectivity"
            ],
            "debug_commands": [
                "systemctl --failed",
                "journalctl -p err -n 50",
                "htop",
                "netstat -tlnp"
            ],
            "useful_links": [
                "https://linux.die.net/man/1/systemctl",
                "https://www.freedesktop.org/software/systemd/man/journalctl.html"
            ]
        }

def log_processing_pipeline(raw_log, summarizer, model, tokenizer):
    results = {
        'raw_log': raw_log,
        'summary': None,
        'analysis': None,
        'success': False,
        'errors': []
    }
    # Step 1: Summarization
    try:
        summary_result = summarizer(raw_log, max_length=350, min_length=40, do_sample=False)
        summary_text = summary_result[0]['summary_text']
        results['summary'] = summary_text
    except Exception as e:
        results['errors'].append(f"Summarization failed: {e}")
        return results
    # Step 2: Analysis
    success = False
    attempts = 0
    max_attempts = 2
    while not success and attempts < max_attempts:
        attempts += 1
        prompt = f"""Analyze this log summary and respond with ONLY a JSON object:\n\nLog: {summary_text}\n\nRequired JSON format:\n{{\n  \"root_cause\": \"explain the main problem\",\n  \"debugging_steps\": [\"step 1\", \"step 2\", \"step 3\"],\n  \"debug_commands\": [\"command1\", \"command2\", \"command3\"],\n  \"useful_links\": [\"link1\", \"link2\"]\n}}\n\nJSON:"""
        try:
            inputs = tokenizer(prompt, return_tensors="pt", max_length=800, truncation=True)
            device = next(model.parameters()).device
            inputs = {k: v.to(device) for k, v in inputs.items()}
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=300,
                    temperature=0.2,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                    repetition_penalty=1.1
                )
            response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
            json_str = extract_json_simple(response)
            if json_str:
                try:
                    parsed = json.loads(json_str)
                    fixed_analysis = ensure_required_keys(parsed, summary_text)
                    results['analysis'] = fixed_analysis
                    results['success'] = True
                    success = True
                except json.JSONDecodeError:
                    if attempts == max_attempts:
                        results['errors'].append(f"JSON parsing failed after {attempts} attempts")
            else:
                if attempts == max_attempts:
                    results['errors'].append("No valid JSON found in response")
        except Exception as e:
            if attempts == max_attempts:
                results['errors'].append(f"Generation failed: {e}")
    if not results['success']:
        results['analysis'] = create_fallback_analysis(summary_text)
        results['success'] = True
        results['errors'].append("Used fallback analysis due to model issues")
    return results

# --- Gradio Interface ---
def process_log_file(file_obj, summarizer, model, tokenizer):
    if file_obj is None:
        return ("No file uploaded", "", "", "", "")
    try:
        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
        log_content = None
        for encoding in encodings:
            try:
                with open(file_obj.name, 'r', encoding=encoding) as f:
                    log_content = f.read()
                break
            except UnicodeDecodeError:
                continue
        if log_content is None:
            return ("Encoding error", "", "", "", "")
        if not log_content.strip():
            return ("Empty file", "", "", "", "")
        if len(log_content) > 100000:
            log_content = log_content[:100000] + "\n... (file truncated)"
        results = log_processing_pipeline(log_content, summarizer, model, tokenizer)
        if results['success']:
            analysis = results['analysis']
            return (
                "Analysis complete",
                results['summary'],
                analysis.get('root_cause', ''),
                '\n'.join(analysis.get('debugging_steps', [])),
                '\n'.join(analysis.get('debug_commands', [])),
                '\n'.join(analysis.get('useful_links', [])),
                json.dumps(results, indent=2)
            )
        else:
            return ("Analysis failed", "", "", "", "")
    except Exception as e:
        return (f"Processing error: {str(e)}", "", "", "", "")

def main():
    summarizer = load_summarizer()
    model, tokenizer = load_qwen()
    with gr.Blocks(title="Minimal LogLens") as app:
        gr.Markdown("# Minimal LogLens Log Analyzer")
        file_input = gr.File(label="Upload Log File", file_types=[".txt", ".log", ".out", ".err"], type="filepath")
        analyze_btn = gr.Button("Analyze Log")
        status = gr.Textbox(label="Status", interactive=False)
        summary = gr.Textbox(label="Summary", lines=3, interactive=False)
        root_cause = gr.Textbox(label="Root Cause", lines=2, interactive=False)
        debug_steps = gr.Textbox(label="Debugging Steps", lines=4, interactive=False)
        debug_commands = gr.Textbox(label="Debug Commands", lines=4, interactive=False)
        useful_links = gr.Textbox(label="Useful Links", lines=2, interactive=False)
        json_output = gr.Code(label="Full JSON Output", language="json", interactive=False)
        analyze_btn.click(
            fn=lambda f: process_log_file(f, summarizer, model, tokenizer),
            inputs=file_input,
            outputs=[status, summary, root_cause, debug_steps, debug_commands, useful_links, json_output]
        )
    app.launch()

if __name__ == "__main__":
    main()