|
import logging |
|
from typing import Dict, List, Any, Optional |
|
import io |
|
from datetime import datetime |
|
import base64 |
|
|
|
|
|
try: |
|
from reportlab.lib.pagesizes import letter, A4 |
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
from reportlab.lib.units import inch |
|
from reportlab.lib import colors |
|
from reportlab.graphics.shapes import Drawing |
|
from reportlab.graphics.charts.piecharts import Pie |
|
from reportlab.graphics.charts.barcharts import VerticalBarChart |
|
REPORTLAB_AVAILABLE = True |
|
except ImportError: |
|
REPORTLAB_AVAILABLE = False |
|
|
|
|
|
try: |
|
import matplotlib.pyplot as plt |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
MATPLOTLIB_AVAILABLE = True |
|
except ImportError: |
|
MATPLOTLIB_AVAILABLE = False |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def generate_pdf_report(results: Dict[str, Any]) -> io.BytesIO: |
|
"""Generate a comprehensive PDF report""" |
|
if not REPORTLAB_AVAILABLE: |
|
logger.error("ReportLab not available for PDF generation") |
|
return _generate_simple_pdf_fallback(results) |
|
|
|
try: |
|
|
|
buffer = io.BytesIO() |
|
|
|
|
|
doc = SimpleDocTemplate( |
|
buffer, |
|
pagesize=A4, |
|
rightMargin=72, |
|
leftMargin=72, |
|
topMargin=72, |
|
bottomMargin=18 |
|
) |
|
|
|
|
|
styles = getSampleStyleSheet() |
|
|
|
|
|
title_style = ParagraphStyle( |
|
'CustomTitle', |
|
parent=styles['Heading1'], |
|
fontSize=24, |
|
spaceAfter=30, |
|
textColor=colors.HexColor('#2E86AB'), |
|
alignment=1 |
|
) |
|
|
|
heading_style = ParagraphStyle( |
|
'CustomHeading', |
|
parent=styles['Heading2'], |
|
fontSize=16, |
|
spaceAfter=12, |
|
spaceBefore=20, |
|
textColor=colors.HexColor('#2E86AB') |
|
) |
|
|
|
|
|
story = [] |
|
|
|
|
|
story.append(Paragraph("Global Business News Intelligence Report", title_style)) |
|
story.append(Spacer(1, 0.5*inch)) |
|
|
|
|
|
story.append(Paragraph(f"Analysis Target: {results.get('query', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal'])) |
|
story.append(Paragraph(f"Total Articles Analyzed: {results.get('total_articles', 0)}", styles['Normal'])) |
|
story.append(Paragraph(f"Processing Time: {results.get('processing_time', 0):.2f} seconds", styles['Normal'])) |
|
story.append(Spacer(1, 0.3*inch)) |
|
|
|
|
|
story.append(Paragraph("Executive Summary", heading_style)) |
|
summary_text = _create_executive_summary(results) |
|
story.append(Paragraph(summary_text, styles['Normal'])) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
story.append(Paragraph("Sentiment Analysis", heading_style)) |
|
sentiment_data = _create_sentiment_section(results, styles) |
|
story.extend(sentiment_data) |
|
|
|
|
|
story.append(Paragraph("Key Stories", heading_style)) |
|
stories_data = _create_stories_section(results, styles) |
|
story.extend(stories_data) |
|
|
|
|
|
if 'keywords' in results and results['keywords']: |
|
story.append(Paragraph("Key Topics and Themes", heading_style)) |
|
keywords_data = _create_keywords_section(results, styles) |
|
story.extend(keywords_data) |
|
|
|
|
|
story.append(Paragraph("News Sources", heading_style)) |
|
sources_data = _create_sources_section(results, styles) |
|
story.extend(sources_data) |
|
|
|
|
|
story.append(Paragraph("Methodology", heading_style)) |
|
methodology_text = _create_methodology_section(results) |
|
story.append(Paragraph(methodology_text, styles['Normal'])) |
|
|
|
|
|
doc.build(story) |
|
|
|
buffer.seek(0) |
|
return buffer |
|
|
|
except Exception as e: |
|
logger.error(f"PDF generation failed: {str(e)}") |
|
return _generate_simple_pdf_fallback(results) |
|
|
|
def _create_executive_summary(results: Dict[str, Any]) -> str: |
|
"""Create executive summary text""" |
|
try: |
|
query = results.get('query', 'the analyzed topic') |
|
total_articles = results.get('total_articles', 0) |
|
avg_sentiment = results.get('average_sentiment', 0) |
|
|
|
sentiment_label = "positive" if avg_sentiment > 0.1 else "negative" if avg_sentiment < -0.1 else "neutral" |
|
|
|
summary = f"This report analyzes {total_articles} news articles related to {query}. " |
|
summary += f"The overall sentiment analysis reveals a {sentiment_label} tone with an average sentiment score of {avg_sentiment:.3f}. " |
|
|
|
|
|
dist = results.get('sentiment_distribution', {}) |
|
positive = dist.get('Positive', 0) |
|
negative = dist.get('Negative', 0) |
|
neutral = dist.get('Neutral', 0) |
|
|
|
summary += f"The analysis shows {positive} positive articles ({positive/total_articles*100:.1f}%), " |
|
summary += f"{negative} negative articles ({negative/total_articles*100:.1f}%), " |
|
summary += f"and {neutral} neutral articles ({neutral/total_articles*100:.1f}%). " |
|
|
|
|
|
if avg_sentiment > 0.2: |
|
summary += "The predominantly positive coverage suggests favorable market conditions or public perception." |
|
elif avg_sentiment < -0.2: |
|
summary += "The predominantly negative coverage indicates concerns or challenges that may require attention." |
|
else: |
|
summary += "The balanced sentiment coverage suggests a mixed outlook with both opportunities and challenges present." |
|
|
|
return summary |
|
|
|
except Exception as e: |
|
logger.error(f"Executive summary creation failed: {str(e)}") |
|
return "Analysis completed successfully with comprehensive sentiment evaluation across multiple news sources." |
|
|
|
def _create_sentiment_section(results: Dict[str, Any], styles) -> List: |
|
"""Create sentiment analysis section""" |
|
story = [] |
|
|
|
try: |
|
|
|
dist = results.get('sentiment_distribution', {}) |
|
sentiment_data = [ |
|
['Sentiment', 'Count', 'Percentage'], |
|
['Positive', str(dist.get('Positive', 0)), f"{dist.get('Positive', 0)/results.get('total_articles', 1)*100:.1f}%"], |
|
['Negative', str(dist.get('Negative', 0)), f"{dist.get('Negative', 0)/results.get('total_articles', 1)*100:.1f}%"], |
|
['Neutral', str(dist.get('Neutral', 0)), f"{dist.get('Neutral', 0)/results.get('total_articles', 1)*100:.1f}%"] |
|
] |
|
|
|
sentiment_table = Table(sentiment_data) |
|
sentiment_table.setStyle(TableStyle([ |
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2E86AB')), |
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), |
|
('ALIGN', (0, 0), (-1, -1), 'CENTER'), |
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), |
|
('FONTSIZE', (0, 0), (-1, 0), 12), |
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12), |
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige), |
|
('GRID', (0, 0), (-1, -1), 1, colors.black) |
|
])) |
|
|
|
story.append(sentiment_table) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
explanation = "Sentiment analysis was performed using multiple models including VADER, Loughran-McDonald financial dictionary, and FinBERT. " |
|
explanation += "Scores range from -1.0 (most negative) to +1.0 (most positive), with scores between -0.1 and +0.1 considered neutral." |
|
|
|
story.append(Paragraph(explanation, styles['Normal'])) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
except Exception as e: |
|
logger.error(f"Sentiment section creation failed: {str(e)}") |
|
story.append(Paragraph("Sentiment analysis data unavailable.", styles['Normal'])) |
|
|
|
return story |
|
|
|
def _create_stories_section(results: Dict[str, Any], styles) -> List: |
|
"""Create top stories section""" |
|
story = [] |
|
|
|
try: |
|
articles = results.get('articles', []) |
|
if not articles: |
|
story.append(Paragraph("No articles available for analysis.", styles['Normal'])) |
|
return story |
|
|
|
|
|
sorted_articles = sorted(articles, key=lambda x: x.get('sentiment', {}).get('compound', 0), reverse=True) |
|
|
|
|
|
if sorted_articles and sorted_articles[0].get('sentiment', {}).get('compound', 0) > 0.1: |
|
story.append(Paragraph("Most Positive Coverage:", styles['Heading3'])) |
|
top_positive = sorted_articles[0] |
|
story.append(Paragraph(f"<b>Title:</b> {top_positive.get('title', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Source:</b> {top_positive.get('source', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Sentiment Score:</b> {top_positive.get('sentiment', {}).get('compound', 0):.3f}", styles['Normal'])) |
|
if 'summary' in top_positive: |
|
story.append(Paragraph(f"<b>Summary:</b> {top_positive['summary'][:300]}...", styles['Normal'])) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
negative_articles = sorted(articles, key=lambda x: x.get('sentiment', {}).get('compound', 0)) |
|
if negative_articles and negative_articles[0].get('sentiment', {}).get('compound', 0) < -0.1: |
|
story.append(Paragraph("Most Negative Coverage:", styles['Heading3'])) |
|
top_negative = negative_articles[0] |
|
story.append(Paragraph(f"<b>Title:</b> {top_negative.get('title', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Source:</b> {top_negative.get('source', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Sentiment Score:</b> {top_negative.get('sentiment', {}).get('compound', 0):.3f}", styles['Normal'])) |
|
if 'summary' in top_negative: |
|
story.append(Paragraph(f"<b>Summary:</b> {top_negative['summary'][:300]}...", styles['Normal'])) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
recent_articles = [a for a in articles if a.get('date')] |
|
if recent_articles: |
|
recent_articles.sort(key=lambda x: x.get('date', ''), reverse=True) |
|
story.append(Paragraph("Most Recent Coverage:", styles['Heading3'])) |
|
recent = recent_articles[0] |
|
story.append(Paragraph(f"<b>Title:</b> {recent.get('title', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Source:</b> {recent.get('source', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Date:</b> {recent.get('date', 'N/A')}", styles['Normal'])) |
|
story.append(Paragraph(f"<b>Sentiment Score:</b> {recent.get('sentiment', {}).get('compound', 0):.3f}", styles['Normal'])) |
|
|
|
except Exception as e: |
|
logger.error(f"Stories section creation failed: {str(e)}") |
|
story.append(Paragraph("Story analysis data unavailable.", styles['Normal'])) |
|
|
|
return story |
|
|
|
def _create_keywords_section(results: Dict[str, Any], styles) -> List: |
|
"""Create keywords section""" |
|
story = [] |
|
|
|
try: |
|
keywords = results.get('keywords', [])[:15] |
|
|
|
if not keywords: |
|
story.append(Paragraph("No keywords extracted.", styles['Normal'])) |
|
return story |
|
|
|
|
|
keyword_data = [['Keyword', 'Relevance Score', 'Category']] |
|
|
|
for kw in keywords: |
|
relevance = kw.get('relevance', 'medium') |
|
score = kw.get('score', 0) |
|
keyword_data.append([ |
|
kw.get('keyword', 'N/A'), |
|
f"{score:.3f}", |
|
relevance.title() |
|
]) |
|
|
|
keyword_table = Table(keyword_data) |
|
keyword_table.setStyle(TableStyle([ |
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2E86AB')), |
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), |
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'), |
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), |
|
('FONTSIZE', (0, 0), (-1, 0), 10), |
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12), |
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige), |
|
('GRID', (0, 0), (-1, -1), 1, colors.black) |
|
])) |
|
|
|
story.append(keyword_table) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
explanation = "Keywords were extracted using the YAKE (Yet Another Keyword Extractor) algorithm, " |
|
explanation += "which identifies the most relevant terms and phrases based on statistical analysis of the text corpus." |
|
|
|
story.append(Paragraph(explanation, styles['Normal'])) |
|
|
|
except Exception as e: |
|
logger.error(f"Keywords section creation failed: {str(e)}") |
|
story.append(Paragraph("Keyword analysis data unavailable.", styles['Normal'])) |
|
|
|
return story |
|
|
|
def _create_sources_section(results: Dict[str, Any], styles) -> List: |
|
"""Create news sources section""" |
|
story = [] |
|
|
|
try: |
|
articles = results.get('articles', []) |
|
|
|
if not articles: |
|
story.append(Paragraph("No source data available.", styles['Normal'])) |
|
return story |
|
|
|
|
|
source_counts = {} |
|
for article in articles: |
|
source = article.get('source', 'Unknown') |
|
source_counts[source] = source_counts.get(source, 0) + 1 |
|
|
|
|
|
source_data = [['News Source', 'Article Count', 'Percentage']] |
|
total_articles = len(articles) |
|
|
|
for source, count in sorted(source_counts.items(), key=lambda x: x[1], reverse=True): |
|
percentage = (count / total_articles) * 100 |
|
source_data.append([source, str(count), f"{percentage:.1f}%"]) |
|
|
|
sources_table = Table(source_data) |
|
sources_table.setStyle(TableStyle([ |
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2E86AB')), |
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), |
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'), |
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), |
|
('FONTSIZE', (0, 0), (-1, 0), 10), |
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12), |
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige), |
|
('GRID', (0, 0), (-1, -1), 1, colors.black) |
|
])) |
|
|
|
story.append(sources_table) |
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
explanation = f"Articles were collected from {len(source_counts)} different news sources, " |
|
explanation += "providing diverse perspectives on the analyzed topic. Source diversity helps ensure comprehensive coverage and reduces bias." |
|
|
|
story.append(Paragraph(explanation, styles['Normal'])) |
|
|
|
except Exception as e: |
|
logger.error(f"Sources section creation failed: {str(e)}") |
|
story.append(Paragraph("Source analysis data unavailable.", styles['Normal'])) |
|
|
|
return story |
|
|
|
def _create_methodology_section(results: Dict[str, Any]) -> str: |
|
"""Create methodology section text""" |
|
methodology = "This analysis employed a comprehensive natural language processing pipeline:\n\n" |
|
|
|
methodology += "1. <b>Data Collection:</b> News articles were scraped from multiple reliable sources using RSS feeds and web scraping techniques. " |
|
methodology += "Content was filtered for relevance and deduplicated to ensure quality.\n\n" |
|
|
|
methodology += "2. <b>Sentiment Analysis:</b> Three complementary models were used: " |
|
methodology += "VADER (general sentiment), Loughran-McDonald dictionary (financial sentiment), and FinBERT (financial domain-specific). " |
|
methodology += "Final scores represent a weighted combination of all models.\n\n" |
|
|
|
methodology += "3. <b>Text Processing:</b> Articles were cleaned, summarized using transformer models, and analyzed for key themes. " |
|
methodology += "Keyword extraction employed the YAKE algorithm for statistical relevance.\n\n" |
|
|
|
methodology += "4. <b>Quality Assurance:</b> All content was filtered for English language, minimum length requirements, and relevance to the query terms. " |
|
methodology += "Results were validated across multiple model outputs for consistency.\n\n" |
|
|
|
if results.get('processing_time'): |
|
methodology += f"Total processing time: {results['processing_time']:.2f} seconds for {results.get('total_articles', 0)} articles." |
|
|
|
return methodology |
|
|
|
def _generate_simple_pdf_fallback(results: Dict[str, Any]) -> io.BytesIO: |
|
"""Generate a simple text-based PDF fallback""" |
|
try: |
|
from fpdf import FPDF |
|
|
|
pdf = FPDF() |
|
pdf.add_page() |
|
pdf.set_font('Arial', 'B', 16) |
|
pdf.cell(40, 10, 'News Analysis Report') |
|
pdf.ln(20) |
|
|
|
pdf.set_font('Arial', '', 12) |
|
pdf.cell(40, 10, f"Query: {results.get('query', 'N/A')}") |
|
pdf.ln(10) |
|
pdf.cell(40, 10, f"Articles: {results.get('total_articles', 0)}") |
|
pdf.ln(10) |
|
pdf.cell(40, 10, f"Average Sentiment: {results.get('average_sentiment', 0):.3f}") |
|
pdf.ln(20) |
|
|
|
|
|
dist = results.get('sentiment_distribution', {}) |
|
pdf.cell(40, 10, 'Sentiment Distribution:') |
|
pdf.ln(10) |
|
pdf.cell(40, 10, f"Positive: {dist.get('Positive', 0)}") |
|
pdf.ln(10) |
|
pdf.cell(40, 10, f"Negative: {dist.get('Negative', 0)}") |
|
pdf.ln(10) |
|
pdf.cell(40, 10, f"Neutral: {dist.get('Neutral', 0)}") |
|
|
|
|
|
buffer = io.BytesIO() |
|
pdf_string = pdf.output(dest='S').encode('latin1') |
|
buffer.write(pdf_string) |
|
buffer.seek(0) |
|
|
|
return buffer |
|
|
|
except Exception as e: |
|
logger.error(f"PDF fallback failed: {str(e)}") |
|
|
|
buffer = io.BytesIO() |
|
buffer.write(b"PDF generation failed. Please check logs.") |
|
buffer.seek(0) |
|
return buffer |
|
|
|
def create_chart_image(data: Dict, chart_type: str = 'pie') -> Optional[str]: |
|
"""Create a chart image for PDF inclusion""" |
|
if not MATPLOTLIB_AVAILABLE: |
|
return None |
|
|
|
try: |
|
plt.figure(figsize=(6, 4)) |
|
|
|
if chart_type == 'pie' and 'sentiment_distribution' in data: |
|
dist = data['sentiment_distribution'] |
|
labels = ['Positive', 'Negative', 'Neutral'] |
|
sizes = [dist.get('Positive', 0), dist.get('Negative', 0), dist.get('Neutral', 0)] |
|
colors = ['#28a745', '#dc3545', '#6c757d'] |
|
|
|
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90) |
|
plt.title('Sentiment Distribution') |
|
|
|
elif chart_type == 'bar' and 'articles' in data: |
|
articles = data['articles'] |
|
sources = {} |
|
for article in articles: |
|
source = article.get('source', 'Unknown') |
|
sources[source] = sources.get(source, 0) + 1 |
|
|
|
|
|
top_sources = dict(sorted(sources.items(), key=lambda x: x[1], reverse=True)[:10]) |
|
|
|
plt.bar(range(len(top_sources)), list(top_sources.values()), color='#2E86AB') |
|
plt.xticks(range(len(top_sources)), list(top_sources.keys()), rotation=45, ha='right') |
|
plt.title('Articles by Source') |
|
plt.ylabel('Article Count') |
|
plt.tight_layout() |
|
|
|
|
|
buffer = io.BytesIO() |
|
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight') |
|
buffer.seek(0) |
|
|
|
image_base64 = base64.b64encode(buffer.getvalue()).decode() |
|
plt.close() |
|
|
|
return image_base64 |
|
|
|
except Exception as e: |
|
logger.error(f"Chart creation failed: {str(e)}") |
|
return None |
|
|
|
def generate_csv_report(results: Dict[str, Any]) -> str: |
|
"""Generate CSV report""" |
|
try: |
|
import csv |
|
import io |
|
|
|
output = io.StringIO() |
|
writer = csv.writer(output) |
|
|
|
|
|
writer.writerow([ |
|
'Title', 'Source', 'URL', 'Date', 'Sentiment_Score', 'Sentiment_Label', |
|
'VADER_Score', 'LM_Score', 'FinBERT_Score', 'Summary' |
|
]) |
|
|
|
|
|
articles = results.get('articles', []) |
|
for article in articles: |
|
sentiment = article.get('sentiment', {}) |
|
compound = sentiment.get('compound', 0) |
|
|
|
|
|
if compound > 0.1: |
|
label = 'Positive' |
|
elif compound < -0.1: |
|
label = 'Negative' |
|
else: |
|
label = 'Neutral' |
|
|
|
writer.writerow([ |
|
article.get('title', ''), |
|
article.get('source', ''), |
|
article.get('url', ''), |
|
article.get('date', ''), |
|
compound, |
|
label, |
|
sentiment.get('vader', ''), |
|
sentiment.get('loughran_mcdonald', ''), |
|
sentiment.get('finbert', ''), |
|
article.get('summary', '')[:200] + '...' if len(article.get('summary', '')) > 200 else article.get('summary', '') |
|
]) |
|
|
|
return output.getvalue() |
|
|
|
except Exception as e: |
|
logger.error(f"CSV generation failed: {str(e)}") |
|
return "Error generating CSV report" |
|
|
|
def generate_json_report(results: Dict[str, Any]) -> str: |
|
"""Generate JSON report with formatted output""" |
|
try: |
|
import json |
|
from datetime import datetime |
|
|
|
|
|
report = { |
|
'metadata': { |
|
'report_generated': datetime.now().isoformat(), |
|
'query': results.get('query', ''), |
|
'total_articles': results.get('total_articles', 0), |
|
'processing_time_seconds': results.get('processing_time', 0), |
|
'languages': results.get('languages', ['English']) |
|
}, |
|
'summary': { |
|
'average_sentiment': results.get('average_sentiment', 0), |
|
'sentiment_distribution': results.get('sentiment_distribution', {}), |
|
'top_sources': _get_top_sources(results), |
|
'date_range': results.get('summary', {}).get('date_range', {}) |
|
}, |
|
'articles': results.get('articles', []), |
|
'keywords': results.get('keywords', [])[:20], |
|
'analysis_methods': { |
|
'sentiment_models': ['VADER', 'Loughran-McDonald', 'FinBERT'], |
|
'summarization_model': 'DistilBART', |
|
'keyword_extraction': 'YAKE', |
|
'translation_models': ['Helsinki-NLP Opus-MT'] |
|
} |
|
} |
|
|
|
return json.dumps(report, indent=2, default=str, ensure_ascii=False) |
|
|
|
except Exception as e: |
|
logger.error(f"JSON generation failed: {str(e)}") |
|
return json.dumps({'error': str(e)}, indent=2) |
|
|
|
def _get_top_sources(results: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
"""Get top news sources from results""" |
|
try: |
|
articles = results.get('articles', []) |
|
sources = {} |
|
|
|
for article in articles: |
|
source = article.get('source', 'Unknown') |
|
sources[source] = sources.get(source, 0) + 1 |
|
|
|
|
|
source_list = [ |
|
{'source': source, 'count': count, 'percentage': round((count / len(articles)) * 100, 1)} |
|
for source, count in sources.items() |
|
] |
|
|
|
return sorted(source_list, key=lambda x: x['count'], reverse=True)[:10] |
|
|
|
except Exception as e: |
|
logger.error(f"Top sources calculation failed: {str(e)}") |
|
return [] |
|
|
|
def validate_report_data(results: Dict[str, Any]) -> bool: |
|
"""Validate that results contain required data for reporting""" |
|
required_keys = ['query', 'articles', 'total_articles'] |
|
|
|
for key in required_keys: |
|
if key not in results: |
|
logger.error(f"Missing required key for reporting: {key}") |
|
return False |
|
|
|
if not isinstance(results['articles'], list) or len(results['articles']) == 0: |
|
logger.error("No articles available for reporting") |
|
return False |
|
|
|
return True |
|
|
|
|
|
__all__ = [ |
|
'generate_pdf_report', |
|
'generate_csv_report', |
|
'generate_json_report', |
|
'create_chart_image', |
|
'validate_report_data' |
|
] |