Spaces:

neonwatty
/

gradio-mcp-screenshotter

Sleeping

App Files Files Community

gradio-mcp-screenshotter / llm_analyzer.py

neonwatty

Upload 4 files

1837ea3 verified 2 months ago

raw

history blame contribute delete

7.87 kB

	import os
	import base64
	from openai import OpenAI
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field
	from typing import List, Optional, Union, Literal
	import json

	# Load environment variables
	load_dotenv()

	# Initialize OpenAI client
	client = OpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=os.environ.get("NEBIUS_API_KEY")
	)

	class ImageUrl(BaseModel):
	url: str

	class ImageContent(BaseModel):
	type: Literal["image_url"]
	image_url: ImageUrl

	class TextContent(BaseModel):
	type: Literal["text"]
	text: str

	class Message(BaseModel):
	role: Literal["system", "user", "assistant"]
	content: Union[str, List[Union[TextContent, ImageContent]]]

	class LLMResponse(BaseModel):
	issues_found: bool = Field(..., description="Whether any styling issues were found")
	details: str = Field(..., description="Description of any issues found or confirmation of no issues")

	class AnalysisSummary(BaseModel):
	summary: str = Field(..., description="Brief summary of findings across all screenshots")
	common_issues: List[str] = Field(default_factory=list, description="List of issues that appear in multiple screenshots")
	overall_assessment: str = Field(..., description="Overall assessment of the website's styling")
	all_passed: bool = Field(..., description="True if all screenshots passed, False if any failed")

	def parse_llm_response(text: str) -> LLMResponse:
	"""Parse the LLM response text into a structured format."""
	try:
	# Extract the boolean value
	issues_found_line = next(line for line in text.split('\n') if line.startswith('ISSUES_FOUND:'))
	issues_found = issues_found_line.split(':', 1)[1].strip().lower() == 'true'

	# Extract the details
	details_line = next(line for line in text.split('\n') if line.startswith('DETAILS:'))
	details = details_line.split(':', 1)[1].strip()

	return LLMResponse(issues_found=issues_found, details=details)
	except Exception as e:
	print(f"Error parsing LLM response: {str(e)}")
	return LLMResponse(issues_found=False, details="Error parsing response")

	def parse_summary_response(text: str, all_passed: bool) -> AnalysisSummary:
	"""Parse the summary response text into a structured format."""
	try:
	lines = text.split('\n')
	summary = next(line.split(':', 1)[1].strip() for line in lines if line.startswith('SUMMARY:'))

	common_issues_line = next(line for line in lines if line.startswith('COMMON_ISSUES:'))
	common_issues = [issue.strip() for issue in common_issues_line.split(':', 1)[1].strip().split(',') if issue.strip()]

	overall_line = next(line for line in lines if line.startswith('OVERALL_ASSESSMENT:'))
	overall_assessment = overall_line.split(':', 1)[1].strip()

	return AnalysisSummary(
	summary=summary,
	common_issues=common_issues,
	overall_assessment=overall_assessment,
	all_passed=all_passed
	)
	except Exception as e:
	print(f"Error parsing summary response: {str(e)}")
	return AnalysisSummary(
	summary="Error parsing summary",
	common_issues=[],
	overall_assessment="Error parsing assessment",
	all_passed=all_passed
	)

	def analyze_screenshots(screenshots: List[str]) -> str:
	"""Analyze screenshots for styling issues using LLM."""
	try:
	print("\nAnalyzing screenshots for styling issues...")

	# Prepare the prompt
	prompt = """Please analyze these website screenshots for any serious styling issues.
	Focus only on identifying clear, objective styling problems such as:
	- Text that is completely unreadable
	- Elements that are severely misaligned
	- Content that is completely cut off
	- Major layout breaks
	- Critical accessibility issues

	Do not make subjective judgments about design preferences or potential improvements.
	Simply identify if there are any serious styling problems that would affect usability.

	Format your response as:
	ISSUES_FOUND: [true/false]
	DETAILS: [Brief description of any issues found, or "No serious styling issues found"]
	"""

	individual_analyses = []
	issues_found_list = []

	# Analyze each screenshot
	for i, screenshot in enumerate(screenshots, 1):
	print(f"\nAnalyzing screenshot {i} of {len(screenshots)}...")

	# Add screenshot to the messages
	print(f'INFO: Processing screenshot {i} --> {screenshot}')
	with open(screenshot, 'rb') as img_file:
	base64_image = base64.b64encode(img_file.read()).decode('utf-8')

	# Create message with image
	messages = [
	{"role": "system", "content": prompt},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": f"Analyze screenshot {i}:"},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{base64_image}"
	}
	}
	]
	}
	]

	# Make the API call for this screenshot
	response = client.chat.completions.create(
	model="google/gemma-3-27b-it",
	max_tokens=512,
	temperature=0.5,
	top_p=0.9,
	extra_body={
	"top_k": 50
	},
	messages=messages
	)

	# Parse the response
	analysis = parse_llm_response(response.choices[0].message.content)
	individual_analyses.append(f"Screenshot {i} Analysis:\n{analysis.model_dump_json(indent=2)}\n")
	issues_found_list.append(analysis.issues_found)

	# Generate summary of all analyses
	summary_prompt = f"""Please provide a summary of the following screenshot analyses.
	Focus on identifying any patterns or common issues across the screenshots.

	Here are the individual analyses:
	{'\n'.join(individual_analyses)}

	Format your response as:
	SUMMARY: [Brief summary of findings across all screenshots]
	COMMON_ISSUES: [List any issues that appear in multiple screenshots]
	OVERALL_ASSESSMENT: [Overall assessment of the website's styling]
	"""

	summary_messages = [
	{"role": "system", "content": "You are a web design analysis assistant that provides clear summaries of styling issues."},
	{"role": "user", "content": summary_prompt}
	]

	summary_response = client.chat.completions.create(
	model="google/gemma-3-27b-it",
	max_tokens=512,
	temperature=0.5,
	top_p=0.9,
	extra_body={
	"top_k": 50
	},
	messages=summary_messages
	)

	# Parse the summary response
	all_passed = all(issues_found_list)
	summary = parse_summary_response(summary_response.choices[0].message.content, all_passed)

	# Combine individual analyses and summary
	final_response = "\n".join(individual_analyses) + "\n\nSUMMARY:\n" + summary.model_dump_json(indent=2)

	print("Analysis complete!")
	return final_response

	except Exception as e:
	print(f"Error analyzing screenshots: {str(e)}")
	return "Error: Could not analyze screenshots"