Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ import urllib.parse
|
|
| 7 |
from datetime import datetime, timedelta
|
| 8 |
import re
|
| 9 |
import os
|
|
|
|
| 10 |
# List of user agents to rotate through
|
| 11 |
_useragent_list = [
|
| 12 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
@@ -138,6 +139,44 @@ def filter_relevant_content(text):
|
|
| 138 |
|
| 139 |
return filtered_text
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
def summarize_financial_news(query):
|
| 142 |
"""Search for financial news, extract relevant content, and summarize"""
|
| 143 |
search_results = google_search(query, num_results=3)
|
|
@@ -151,15 +190,29 @@ def summarize_financial_news(query):
|
|
| 151 |
if not all_filtered_text:
|
| 152 |
return "No relevant financial information found."
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
Provide a detailed, coherent summary focusing on financial implications and analysis."""
|
| 159 |
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
return
|
| 163 |
|
| 164 |
# Gradio Interface
|
| 165 |
iface = gr.Interface(
|
|
|
|
| 7 |
from datetime import datetime, timedelta
|
| 8 |
import re
|
| 9 |
import os
|
| 10 |
+
|
| 11 |
# List of user agents to rotate through
|
| 12 |
_useragent_list = [
|
| 13 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
|
| 139 |
|
| 140 |
return filtered_text
|
| 141 |
|
| 142 |
+
def chunk_text(text, max_chunk_size=1000, overlap=100):
|
| 143 |
+
# List of keywords that might indicate new sections
|
| 144 |
+
section_keywords = ["revenue", "income", "profit", "loss", "expenses", "outlook", "forecast", "quarter", "year"]
|
| 145 |
+
|
| 146 |
+
# Split text into sentences
|
| 147 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
| 148 |
+
|
| 149 |
+
chunks = []
|
| 150 |
+
current_chunk = ""
|
| 151 |
+
|
| 152 |
+
for sentence in sentences:
|
| 153 |
+
if len(current_chunk) + len(sentence) > max_chunk_size:
|
| 154 |
+
# If adding this sentence exceeds max_chunk_size, start a new chunk
|
| 155 |
+
chunks.append(current_chunk.strip())
|
| 156 |
+
current_chunk = sentence + " "
|
| 157 |
+
elif any(keyword in sentence.lower() for keyword in section_keywords):
|
| 158 |
+
# If sentence contains a section keyword, start a new chunk
|
| 159 |
+
if current_chunk:
|
| 160 |
+
chunks.append(current_chunk.strip())
|
| 161 |
+
current_chunk = sentence + " "
|
| 162 |
+
else:
|
| 163 |
+
current_chunk += sentence + " "
|
| 164 |
+
|
| 165 |
+
# Add the last chunk if it's not empty
|
| 166 |
+
if current_chunk:
|
| 167 |
+
chunks.append(current_chunk.strip())
|
| 168 |
+
|
| 169 |
+
# Add overlap
|
| 170 |
+
overlapped_chunks = []
|
| 171 |
+
for i, chunk in enumerate(chunks):
|
| 172 |
+
if i > 0:
|
| 173 |
+
chunk = chunks[i-1][-overlap:] + chunk
|
| 174 |
+
if i < len(chunks) - 1:
|
| 175 |
+
chunk = chunk + chunks[i+1][:overlap]
|
| 176 |
+
overlapped_chunks.append(chunk)
|
| 177 |
+
|
| 178 |
+
return overlapped_chunks
|
| 179 |
+
|
| 180 |
def summarize_financial_news(query):
|
| 181 |
"""Search for financial news, extract relevant content, and summarize"""
|
| 182 |
search_results = google_search(query, num_results=3)
|
|
|
|
| 190 |
if not all_filtered_text:
|
| 191 |
return "No relevant financial information found."
|
| 192 |
|
| 193 |
+
# Chunk the filtered text
|
| 194 |
+
chunks = chunk_text(all_filtered_text, max_chunk_size=3000, overlap=200)
|
| 195 |
+
|
| 196 |
+
summaries = []
|
| 197 |
+
for chunk in chunks:
|
| 198 |
+
prompt = f"""You are a financial analyst. Summarize the following text from a financial perspective:
|
| 199 |
+
{chunk}
|
| 200 |
Provide a detailed, coherent summary focusing on financial implications and analysis."""
|
| 201 |
|
| 202 |
+
summary = query_llama({"inputs": prompt, "parameters": {"max_length": 1000}})
|
| 203 |
+
summaries.append(summary[0]['generated_text'])
|
| 204 |
+
|
| 205 |
+
# Combine summaries
|
| 206 |
+
combined_summary = "\n\n".join(summaries)
|
| 207 |
+
|
| 208 |
+
# Final summarization of combined summaries
|
| 209 |
+
final_prompt = f"""As a financial analyst, provide a coherent and comprehensive summary of the following financial information:
|
| 210 |
+
{combined_summary}
|
| 211 |
+
Focus on the most important financial implications and analysis."""
|
| 212 |
+
|
| 213 |
+
final_summary = query_llama({"inputs": final_prompt, "parameters": {"max_length": 2000}})
|
| 214 |
|
| 215 |
+
return final_summary[0]['generated_text']
|
| 216 |
|
| 217 |
# Gradio Interface
|
| 218 |
iface = gr.Interface(
|