gradio-mcp-screenshotter / llm_analyzer.py
neonwatty's picture
Upload 4 files
1837ea3 verified
import os
import base64
from openai import OpenAI
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from typing import List, Optional, Union, Literal
import json
# Load environment variables
load_dotenv()
# Initialize OpenAI client
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=os.environ.get("NEBIUS_API_KEY")
)
class ImageUrl(BaseModel):
url: str
class ImageContent(BaseModel):
type: Literal["image_url"]
image_url: ImageUrl
class TextContent(BaseModel):
type: Literal["text"]
text: str
class Message(BaseModel):
role: Literal["system", "user", "assistant"]
content: Union[str, List[Union[TextContent, ImageContent]]]
class LLMResponse(BaseModel):
issues_found: bool = Field(..., description="Whether any styling issues were found")
details: str = Field(..., description="Description of any issues found or confirmation of no issues")
class AnalysisSummary(BaseModel):
summary: str = Field(..., description="Brief summary of findings across all screenshots")
common_issues: List[str] = Field(default_factory=list, description="List of issues that appear in multiple screenshots")
overall_assessment: str = Field(..., description="Overall assessment of the website's styling")
all_passed: bool = Field(..., description="True if all screenshots passed, False if any failed")
def parse_llm_response(text: str) -> LLMResponse:
"""Parse the LLM response text into a structured format."""
try:
# Extract the boolean value
issues_found_line = next(line for line in text.split('\n') if line.startswith('ISSUES_FOUND:'))
issues_found = issues_found_line.split(':', 1)[1].strip().lower() == 'true'
# Extract the details
details_line = next(line for line in text.split('\n') if line.startswith('DETAILS:'))
details = details_line.split(':', 1)[1].strip()
return LLMResponse(issues_found=issues_found, details=details)
except Exception as e:
print(f"Error parsing LLM response: {str(e)}")
return LLMResponse(issues_found=False, details="Error parsing response")
def parse_summary_response(text: str, all_passed: bool) -> AnalysisSummary:
"""Parse the summary response text into a structured format."""
try:
lines = text.split('\n')
summary = next(line.split(':', 1)[1].strip() for line in lines if line.startswith('SUMMARY:'))
common_issues_line = next(line for line in lines if line.startswith('COMMON_ISSUES:'))
common_issues = [issue.strip() for issue in common_issues_line.split(':', 1)[1].strip().split(',') if issue.strip()]
overall_line = next(line for line in lines if line.startswith('OVERALL_ASSESSMENT:'))
overall_assessment = overall_line.split(':', 1)[1].strip()
return AnalysisSummary(
summary=summary,
common_issues=common_issues,
overall_assessment=overall_assessment,
all_passed=all_passed
)
except Exception as e:
print(f"Error parsing summary response: {str(e)}")
return AnalysisSummary(
summary="Error parsing summary",
common_issues=[],
overall_assessment="Error parsing assessment",
all_passed=all_passed
)
def analyze_screenshots(screenshots: List[str]) -> str:
"""Analyze screenshots for styling issues using LLM."""
try:
print("\nAnalyzing screenshots for styling issues...")
# Prepare the prompt
prompt = """Please analyze these website screenshots for any serious styling issues.
Focus only on identifying clear, objective styling problems such as:
- Text that is completely unreadable
- Elements that are severely misaligned
- Content that is completely cut off
- Major layout breaks
- Critical accessibility issues
Do not make subjective judgments about design preferences or potential improvements.
Simply identify if there are any serious styling problems that would affect usability.
Format your response as:
ISSUES_FOUND: [true/false]
DETAILS: [Brief description of any issues found, or "No serious styling issues found"]
"""
individual_analyses = []
issues_found_list = []
# Analyze each screenshot
for i, screenshot in enumerate(screenshots, 1):
print(f"\nAnalyzing screenshot {i} of {len(screenshots)}...")
# Add screenshot to the messages
print(f'INFO: Processing screenshot {i} --> {screenshot}')
with open(screenshot, 'rb') as img_file:
base64_image = base64.b64encode(img_file.read()).decode('utf-8')
# Create message with image
messages = [
{"role": "system", "content": prompt},
{
"role": "user",
"content": [
{"type": "text", "text": f"Analyze screenshot {i}:"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
]
# Make the API call for this screenshot
response = client.chat.completions.create(
model="google/gemma-3-27b-it",
max_tokens=512,
temperature=0.5,
top_p=0.9,
extra_body={
"top_k": 50
},
messages=messages
)
# Parse the response
analysis = parse_llm_response(response.choices[0].message.content)
individual_analyses.append(f"Screenshot {i} Analysis:\n{analysis.model_dump_json(indent=2)}\n")
issues_found_list.append(analysis.issues_found)
# Generate summary of all analyses
summary_prompt = f"""Please provide a summary of the following screenshot analyses.
Focus on identifying any patterns or common issues across the screenshots.
Here are the individual analyses:
{'\n'.join(individual_analyses)}
Format your response as:
SUMMARY: [Brief summary of findings across all screenshots]
COMMON_ISSUES: [List any issues that appear in multiple screenshots]
OVERALL_ASSESSMENT: [Overall assessment of the website's styling]
"""
summary_messages = [
{"role": "system", "content": "You are a web design analysis assistant that provides clear summaries of styling issues."},
{"role": "user", "content": summary_prompt}
]
summary_response = client.chat.completions.create(
model="google/gemma-3-27b-it",
max_tokens=512,
temperature=0.5,
top_p=0.9,
extra_body={
"top_k": 50
},
messages=summary_messages
)
# Parse the summary response
all_passed = all(issues_found_list)
summary = parse_summary_response(summary_response.choices[0].message.content, all_passed)
# Combine individual analyses and summary
final_response = "\n".join(individual_analyses) + "\n\nSUMMARY:\n" + summary.model_dump_json(indent=2)
print("Analysis complete!")
return final_response
except Exception as e:
print(f"Error analyzing screenshots: {str(e)}")
return "Error: Could not analyze screenshots"