mgbam commited on
Commit
c0d8669
·
verified ·
1 Parent(s): a1abbf9

Update app/gemini_analyzer.py

Browse files
Files changed (1) hide show
  1. app/gemini_analyzer.py +41 -53
app/gemini_analyzer.py CHANGED
@@ -1,22 +1,16 @@
1
  """
2
- A sophisticated analyzer using the Google Gemini Pro API.
3
 
4
- This module provides structured analysis of financial text, including:
5
- - Nuanced sentiment with reasoning.
6
- - Key entity extraction (e.g., cryptocurrencies).
7
- - Topic classification.
8
- - Potential market impact assessment.
9
  """
10
  import os
11
  import logging
12
  import httpx
13
- import json
14
  from typing import Optional, TypedDict, List, Union
15
 
16
- # Configure logging
17
  logger = logging.getLogger(__name__)
18
 
19
- # --- Pydantic-like models for structured output ---
20
  class AnalysisResult(TypedDict):
21
  sentiment: str
22
  sentiment_score: float
@@ -27,82 +21,76 @@ class AnalysisResult(TypedDict):
27
  summary: str
28
  error: Optional[str]
29
 
30
-
31
  class GeminiAnalyzer:
32
- """Manages interaction with the Google Gemini API for deep text analysis."""
33
-
34
  API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent"
35
 
36
  def __init__(self, client: httpx.AsyncClient, api_key: Optional[str] = None):
37
  self.client = client
38
  self.api_key = api_key or os.getenv("GEMINI_API_KEY")
39
  if not self.api_key:
40
- raise ValueError("GEMINI_API_KEY is not set. Please add it as a repository secret.")
41
  self.params = {"key": self.api_key}
42
  self.headers = {"Content-Type": "application/json"}
43
 
44
  def _build_prompt(self, text: str) -> dict:
45
- """Creates the structured JSON prompt for the Gemini API."""
46
  return {
47
  "contents": [{
48
  "parts": [{
49
  "text": f"""
50
  Analyze the following financial text from the cryptocurrency world.
51
- Provide your analysis as a single, minified JSON object with NO markdown formatting.
52
-
53
- The JSON object must have these exact keys: "sentiment", "sentiment_score", "reason", "entities", "topic", "impact", "summary".
54
-
55
- - "sentiment": MUST be one of "POSITIVE", "NEGATIVE", or "NEUTRAL".
56
- - "sentiment_score": A float between -1.0 (very negative) and 1.0 (very positive).
57
- - "reason": A brief, one-sentence explanation for the sentiment score.
58
- - "entities": A JSON array of strings listing the primary cryptocurrencies or tokens mentioned (e.g., ["Bitcoin", "ETH"]).
59
- - "topic": MUST be one of "Regulation", "Partnership", "Technical Update", "Market Hype", "Security", or "General News".
60
- - "impact": Assess the potential short-term market impact. MUST be one of "LOW", "MEDIUM", or "HIGH".
61
- - "summary": A concise, one-sentence summary of the provided text.
62
-
63
- Text to analyze: "{text}"
64
  """
65
  }]
66
  }]
67
  }
68
 
69
- def _extract_json(self, text: str) -> Optional[dict]:
70
- """Finds and parses the first valid JSON object in a string."""
71
- try:
72
- # Find the first '{' and the last '}' to isolate the JSON blob
73
- start_index = text.find('{')
74
- end_index = text.rfind('}')
75
- if start_index != -1 and end_index != -1 and end_index > start_index:
76
- json_str = text[start_index:end_index+1]
77
- return json.loads(json_str)
78
- except json.JSONDecodeError as e:
79
- logger.error(f"Failed to decode JSON from extracted text: {text} | Error: {e}")
80
- return None
 
 
 
 
 
 
 
81
 
82
  async def analyze_text(self, text: str) -> AnalysisResult:
83
  """Sends text to Gemini and returns a structured analysis."""
84
  prompt = self._build_prompt(text)
85
  try:
86
- response = await self.client.post(
87
- self.API_URL, headers=self.headers, params=self.params, json=prompt, timeout=60.0
88
- )
89
  response.raise_for_status()
90
-
91
  full_response = response.json()
92
  response_text = full_response["candidates"][0]["content"]["parts"][0]["text"]
93
 
94
- # Use the new robust JSON extractor
95
- analysis = self._extract_json(response_text)
96
-
97
- if analysis:
98
- analysis["error"] = None
99
- return analysis
100
- else:
101
- # This will be logged if the helper function fails
102
- raise ValueError(f"Could not extract valid JSON from Gemini response: {response_text}")
103
 
104
  except Exception as e:
105
- logger.error(f"❌ Gemini Analysis Error: {e}")
106
  return {
107
  "sentiment": "ERROR", "sentiment_score": 0.0, "reason": str(e),
108
  "entities": [], "topic": "Unknown", "impact": "Unknown",
 
1
  """
2
+ A robust, resilient analyzer using the Google Gemini Pro API.
3
 
4
+ This module prompts Gemini for a simple key-value format and then constructs
5
+ the final JSON object in Python, making it resilient to LLM syntax errors.
 
 
 
6
  """
7
  import os
8
  import logging
9
  import httpx
 
10
  from typing import Optional, TypedDict, List, Union
11
 
 
12
  logger = logging.getLogger(__name__)
13
 
 
14
  class AnalysisResult(TypedDict):
15
  sentiment: str
16
  sentiment_score: float
 
21
  summary: str
22
  error: Optional[str]
23
 
 
24
  class GeminiAnalyzer:
 
 
25
  API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent"
26
 
27
  def __init__(self, client: httpx.AsyncClient, api_key: Optional[str] = None):
28
  self.client = client
29
  self.api_key = api_key or os.getenv("GEMINI_API_KEY")
30
  if not self.api_key:
31
+ raise ValueError("GEMINI_API_KEY is not set.")
32
  self.params = {"key": self.api_key}
33
  self.headers = {"Content-Type": "application/json"}
34
 
35
  def _build_prompt(self, text: str) -> dict:
36
+ """Creates a prompt asking for a simple, parsable text format."""
37
  return {
38
  "contents": [{
39
  "parts": [{
40
  "text": f"""
41
  Analyze the following financial text from the cryptocurrency world.
42
+ Respond using a simple key::value format, with each key-value pair on a new line. Do NOT use JSON.
43
+
44
+ KEYS:
45
+ sentiment:: [POSITIVE, NEGATIVE, or NEUTRAL]
46
+ sentiment_score:: [A float between -1.0 and 1.0]
47
+ reason:: [A brief, one-sentence explanation for the sentiment.]
48
+ entities:: [A comma-separated list of cryptocurrencies mentioned, e.g., Bitcoin, ETH]
49
+ topic:: [One of: Regulation, Partnership, Technical Update, Market Hype, Security, General News]
50
+ impact:: [One of: LOW, MEDIUM, HIGH]
51
+ summary:: [A concise, one-sentence summary of the text.]
52
+
53
+ TEXT TO ANALYZE: "{text}"
 
54
  """
55
  }]
56
  }]
57
  }
58
 
59
+ def _parse_structured_text(self, text: str) -> AnalysisResult:
60
+ """Parses the key::value text response from Gemini into a structured dict."""
61
+ data = {}
62
+ for line in text.splitlines():
63
+ if '::' in line:
64
+ key, value = line.split('::', 1)
65
+ data[key.strip()] = value.strip()
66
+
67
+ # Build the final, validated object
68
+ return {
69
+ "sentiment": data.get("sentiment", "NEUTRAL").upper(),
70
+ "sentiment_score": float(data.get("sentiment_score", 0.0)),
71
+ "reason": data.get("reason", "N/A"),
72
+ "entities": [e.strip() for e in data.get("entities", "").split(',') if e.strip()],
73
+ "topic": data.get("topic", "General News"),
74
+ "impact": data.get("impact", "LOW").upper(),
75
+ "summary": data.get("summary", "Summary not available."),
76
+ "error": None
77
+ }
78
 
79
  async def analyze_text(self, text: str) -> AnalysisResult:
80
  """Sends text to Gemini and returns a structured analysis."""
81
  prompt = self._build_prompt(text)
82
  try:
83
+ response = await self.client.post(self.API_URL, headers=self.headers, params=self.params, json=prompt, timeout=60.0)
 
 
84
  response.raise_for_status()
85
+
86
  full_response = response.json()
87
  response_text = full_response["candidates"][0]["content"]["parts"][0]["text"]
88
 
89
+ # Use our new, robust parser
90
+ return self._parse_structured_text(response_text)
 
 
 
 
 
 
 
91
 
92
  except Exception as e:
93
+ logger.error(f"❌ Gemini API or Parsing Error: {e}")
94
  return {
95
  "sentiment": "ERROR", "sentiment_score": 0.0, "reason": str(e),
96
  "entities": [], "topic": "Unknown", "impact": "Unknown",