""" A sophisticated analyzer using the Google Gemini Pro API. This module provides structured analysis of financial text, including: - Nuanced sentiment with reasoning. - Key entity extraction (e.g., cryptocurrencies). - Topic classification. - Potential market impact assessment. """ import os import logging import httpx import json from typing import Optional, TypedDict, List, Union # Configure logging logger = logging.getLogger(__name__) # --- Pydantic-like models for structured output --- class AnalysisResult(TypedDict): sentiment: str sentiment_score: float reason: str entities: List[str] topic: str impact: str summary: str error: Optional[str] class GeminiAnalyzer: """Manages interaction with the Google Gemini API for deep text analysis.""" API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent" def __init__(self, client: httpx.AsyncClient, api_key: Optional[str] = None): self.client = client self.api_key = api_key or os.getenv("GEMINI_API_KEY") if not self.api_key: raise ValueError("GEMINI_API_KEY is not set. Please add it as a repository secret.") self.params = {"key": self.api_key} self.headers = {"Content-Type": "application/json"} def _build_prompt(self, text: str) -> dict: """Creates the structured JSON prompt for the Gemini API.""" return { "contents": [{ "parts": [{ "text": f""" Analyze the following financial text from the cryptocurrency world. Provide your analysis as a single, minified JSON object with NO markdown formatting. The JSON object must have these exact keys: "sentiment", "sentiment_score", "reason", "entities", "topic", "impact", "summary". - "sentiment": MUST be one of "POSITIVE", "NEGATIVE", or "NEUTRAL". - "sentiment_score": A float between -1.0 (very negative) and 1.0 (very positive). - "reason": A brief, one-sentence explanation for the sentiment score. - "entities": A JSON array of strings listing the primary cryptocurrencies or tokens mentioned (e.g., ["Bitcoin", "ETH"]). - "topic": MUST be one of "Regulation", "Partnership", "Technical Update", "Market Hype", "Security", or "General News". - "impact": Assess the potential short-term market impact. MUST be one of "LOW", "MEDIUM", or "HIGH". - "summary": A concise, one-sentence summary of the provided text. Text to analyze: "{text}" """ }] }] } def _extract_json(self, text: str) -> Optional[dict]: """Finds and parses the first valid JSON object in a string.""" try: # Find the first '{' and the last '}' to isolate the JSON blob start_index = text.find('{') end_index = text.rfind('}') if start_index != -1 and end_index != -1 and end_index > start_index: json_str = text[start_index:end_index+1] return json.loads(json_str) except json.JSONDecodeError as e: logger.error(f"Failed to decode JSON from extracted text: {text} | Error: {e}") return None async def analyze_text(self, text: str) -> AnalysisResult: """Sends text to Gemini and returns a structured analysis.""" prompt = self._build_prompt(text) try: response = await self.client.post( self.API_URL, headers=self.headers, params=self.params, json=prompt, timeout=60.0 ) response.raise_for_status() full_response = response.json() response_text = full_response["candidates"][0]["content"]["parts"][0]["text"] # Use the new robust JSON extractor analysis = self._extract_json(response_text) if analysis: analysis["error"] = None return analysis else: # This will be logged if the helper function fails raise ValueError(f"Could not extract valid JSON from Gemini response: {response_text}") except Exception as e: logger.error(f"❌ Gemini Analysis Error: {e}") return { "sentiment": "ERROR", "sentiment_score": 0.0, "reason": str(e), "entities": [], "topic": "Unknown", "impact": "Unknown", "summary": "Failed to perform analysis.", "error": str(e) }