Spaces:
Running
Running
""" | |
SEC Edgar Filings Search Tool for financial and company data | |
""" | |
from .base_tool import BaseTool | |
import requests | |
import json | |
import re | |
from typing import Dict, List, Optional | |
class SECSearchTool(BaseTool): | |
"""Search SEC EDGAR filings for company financial information""" | |
def __init__(self): | |
super().__init__("SEC EDGAR", "Search SEC filings and financial data for public companies") | |
self.base_url = "https://data.sec.gov" | |
self.headers = { | |
'User-Agent': 'Research Tool [email protected]', # SEC requires User-Agent | |
'Accept-Encoding': 'gzip, deflate' | |
} | |
self.rate_limit_delay = 3.0 # SEC is strict about rate limiting | |
def search(self, company_name: str, **kwargs) -> str: | |
"""Search SEC filings for company information""" | |
self.rate_limit() | |
try: | |
# First attempt to find company CIK | |
cik_data = self._find_company_cik(company_name) | |
if not cik_data: | |
return self._fallback_company_search(company_name) | |
# Get company submissions | |
submissions = self._get_company_submissions(cik_data['cik']) | |
if submissions: | |
return self._format_sec_results(company_name, cik_data, submissions) | |
else: | |
return self._fallback_company_search(company_name) | |
except requests.RequestException as e: | |
# Handle network errors gracefully | |
if "404" in str(e): | |
return self._fallback_company_search(company_name) | |
return self.format_error_response(company_name, f"Network error accessing SEC: {str(e)}") | |
except Exception as e: | |
return self.format_error_response(company_name, str(e)) | |
def _find_company_cik(self, company_name: str) -> Optional[Dict]: | |
"""Find company CIK (Central Index Key) from company name""" | |
try: | |
# Use the correct SEC company tickers endpoint | |
tickers_url = "https://www.sec.gov/files/company_tickers_exchange.json" | |
response = requests.get(tickers_url, headers=self.headers, timeout=15) | |
response.raise_for_status() | |
tickers_data = response.json() | |
# Search for company by name (fuzzy matching) | |
company_lower = company_name.lower() | |
# Handle the exchange data format | |
if isinstance(tickers_data, dict): | |
# Check if it's the fields/data format | |
if 'fields' in tickers_data and 'data' in tickers_data: | |
return self._search_exchange_format(tickers_data, company_lower) | |
else: | |
# Try direct dictionary format | |
return self._search_direct_format(tickers_data, company_lower) | |
elif isinstance(tickers_data, list): | |
# Handle list format | |
return self._search_list_format(tickers_data, company_lower) | |
return None | |
except Exception as e: | |
print(f"Error finding company CIK: {e}") | |
return self._fallback_company_lookup(company_name) | |
def _fallback_company_lookup(self, company_name: str) -> Optional[Dict]: | |
"""Fallback company lookup using known major companies""" | |
# Hardcoded CIKs for major companies for testing/demo purposes | |
known_companies = { | |
'apple': {'cik': '0000320193', 'ticker': 'AAPL', 'title': 'Apple Inc.'}, | |
'microsoft': {'cik': '0000789019', 'ticker': 'MSFT', 'title': 'Microsoft Corporation'}, | |
'tesla': {'cik': '0001318605', 'ticker': 'TSLA', 'title': 'Tesla, Inc.'}, | |
'amazon': {'cik': '0001018724', 'ticker': 'AMZN', 'title': 'Amazon.com, Inc.'}, | |
'google': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, | |
'alphabet': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, | |
'meta': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, | |
'facebook': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, | |
'nvidia': {'cik': '0001045810', 'ticker': 'NVDA', 'title': 'NVIDIA Corporation'}, | |
'netflix': {'cik': '0001065280', 'ticker': 'NFLX', 'title': 'Netflix, Inc.'} | |
} | |
company_key = company_name.lower().strip() | |
for key, data in known_companies.items(): | |
if key in company_key or company_key in key: | |
return data | |
return None | |
def _search_exchange_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: | |
"""Search in exchange ticker data format""" | |
try: | |
fields = tickers_data.get('fields', []) | |
data = tickers_data.get('data', []) | |
# Find field indices | |
cik_idx = None | |
ticker_idx = None | |
name_idx = None | |
for i, field in enumerate(fields): | |
if field.lower() in ['cik', 'cik_str']: | |
cik_idx = i | |
elif field.lower() in ['ticker', 'symbol']: | |
ticker_idx = i | |
elif field.lower() in ['name', 'title', 'company']: | |
name_idx = i | |
# Search through data | |
for row in data: | |
if len(row) > max(filter(None, [cik_idx, ticker_idx, name_idx])): | |
name = str(row[name_idx]).lower() if name_idx is not None else "" | |
ticker = str(row[ticker_idx]).lower() if ticker_idx is not None else "" | |
if (company_lower in name or | |
name in company_lower or | |
company_lower == ticker or | |
any(word in name for word in company_lower.split() if len(word) > 3)): | |
cik = str(row[cik_idx]) if cik_idx is not None else "" | |
return { | |
'cik': cik.zfill(10), | |
'ticker': row[ticker_idx] if ticker_idx is not None else "", | |
'title': row[name_idx] if name_idx is not None else "" | |
} | |
except (ValueError, IndexError) as e: | |
print(f"Error parsing exchange format: {e}") | |
return None | |
def _search_direct_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: | |
"""Search in direct dictionary format""" | |
for key, entry in tickers_data.items(): | |
if isinstance(entry, dict): | |
title = entry.get('title', entry.get('name', '')).lower() | |
ticker = entry.get('ticker', entry.get('symbol', '')).lower() | |
if (company_lower in title or | |
title in company_lower or | |
company_lower == ticker or | |
any(word in title for word in company_lower.split() if len(word) > 3)): | |
return { | |
'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), | |
'ticker': entry.get('ticker', entry.get('symbol', '')), | |
'title': entry.get('title', entry.get('name', '')) | |
} | |
return None | |
def _search_list_format(self, tickers_data: list, company_lower: str) -> Optional[Dict]: | |
"""Search in list format""" | |
for entry in tickers_data: | |
if isinstance(entry, dict): | |
title = entry.get('title', entry.get('name', '')).lower() | |
ticker = entry.get('ticker', entry.get('symbol', '')).lower() | |
if (company_lower in title or | |
title in company_lower or | |
company_lower == ticker or | |
any(word in title for word in company_lower.split() if len(word) > 3)): | |
return { | |
'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), | |
'ticker': entry.get('ticker', entry.get('symbol', '')), | |
'title': entry.get('title', entry.get('name', '')) | |
} | |
return None | |
def _get_company_submissions(self, cik: str) -> Optional[Dict]: | |
"""Get company submission data from SEC""" | |
try: | |
submissions_url = f"{self.base_url}/submissions/CIK{cik}.json" | |
response = requests.get(submissions_url, headers=self.headers, timeout=15) | |
response.raise_for_status() | |
return response.json() | |
except Exception as e: | |
print(f"Error getting company submissions: {e}") | |
return None | |
def _format_sec_results(self, company_name: str, cik_data: Dict, submissions: Dict) -> str: | |
"""Format SEC filing results""" | |
result = f"**SEC Financial Data for: {company_name}**\n\n" | |
# Company information | |
result += f"**Company Information:**\n" | |
result += f"• Official Name: {cik_data['title']}\n" | |
result += f"• Ticker Symbol: {cik_data.get('ticker', 'N/A')}\n" | |
result += f"• CIK: {cik_data['cik']}\n" | |
# Business information | |
if 'description' in submissions: | |
business_desc = submissions['description'][:300] + "..." if len(submissions.get('description', '')) > 300 else submissions.get('description', 'Not available') | |
result += f"• Business Description: {business_desc}\n" | |
result += f"• Industry: {submissions.get('sic', 'Not specified')}\n" | |
result += f"• Fiscal Year End: {submissions.get('fiscalYearEnd', 'Not specified')}\n\n" | |
# Recent filings analysis | |
recent_filings = self._analyze_recent_filings(submissions) | |
result += recent_filings | |
# Financial highlights | |
financial_highlights = self._extract_financial_highlights(submissions) | |
result += financial_highlights | |
return result | |
def _analyze_recent_filings(self, submissions: Dict) -> str: | |
"""Analyze recent SEC filings""" | |
result = "**Recent SEC Filings:**\n" | |
# Get recent filings | |
recent_filings = submissions.get('filings', {}).get('recent', {}) | |
if not recent_filings: | |
return result + "• No recent filings available\n\n" | |
forms = recent_filings.get('form', []) | |
filing_dates = recent_filings.get('filingDate', []) | |
accession_numbers = recent_filings.get('accessionNumber', []) | |
# Analyze key filing types | |
key_forms = ['10-K', '10-Q', '8-K', 'DEF 14A'] | |
recent_key_filings = [] | |
for i, form in enumerate(forms[:20]): # Check last 20 filings | |
if form in key_forms and i < len(filing_dates): | |
recent_key_filings.append({ | |
'form': form, | |
'date': filing_dates[i], | |
'accession': accession_numbers[i] if i < len(accession_numbers) else 'N/A' | |
}) | |
if recent_key_filings: | |
for filing in recent_key_filings[:5]: # Show top 5 | |
form_description = { | |
'10-K': 'Annual Report', | |
'10-Q': 'Quarterly Report', | |
'8-K': 'Current Report', | |
'DEF 14A': 'Proxy Statement' | |
}.get(filing['form'], filing['form']) | |
result += f"• {filing['form']} ({form_description}) - Filed: {filing['date']}\n" | |
else: | |
result += "• No key financial filings found in recent submissions\n" | |
result += "\n" | |
return result | |
def _extract_financial_highlights(self, submissions: Dict) -> str: | |
"""Extract financial highlights from submission data""" | |
result = "**Financial Data Analysis:**\n" | |
# This is a simplified version - full implementation would parse actual financial data | |
result += "• Filing Status: Active public company\n" | |
result += "• Regulatory Compliance: Current with SEC requirements\n" | |
# Check for recent financial filings | |
recent_filings = submissions.get('filings', {}).get('recent', {}) | |
if recent_filings: | |
forms = recent_filings.get('form', []) | |
annual_reports = sum(1 for form in forms if form == '10-K') | |
quarterly_reports = sum(1 for form in forms if form == '10-Q') | |
result += f"• Annual Reports (10-K): {annual_reports} on file\n" | |
result += f"• Quarterly Reports (10-Q): {quarterly_reports} on file\n" | |
result += "• Note: Detailed financial metrics require parsing individual filing documents\n\n" | |
result += "**Investment Research Notes:**\n" | |
result += "• Use SEC filings for: revenue trends, risk factors, management discussion\n" | |
result += "• Key documents: 10-K (annual), 10-Q (quarterly), 8-K (material events)\n" | |
result += "• Combine with market data for comprehensive analysis\n\n" | |
return result | |
def _fallback_company_search(self, company_name: str) -> str: | |
"""Fallback response when company not found in SEC database""" | |
result = f"**SEC Financial Research for: {company_name}**\n\n" | |
result += f"**Company Search Results:**\n" | |
result += f"• Company '{company_name}' not found in SEC EDGAR database\n" | |
result += f"• This may indicate the company is:\n" | |
result += f" - Private company (not required to file with SEC)\n" | |
result += f" - Foreign company not listed on US exchanges\n" | |
result += f" - Subsidiary of another public company\n" | |
result += f" - Different legal name than search term\n\n" | |
result += f"**Alternative Research Suggestions:**\n" | |
result += f"• Search for parent company or holding company\n" | |
result += f"• Check if company trades under different ticker symbol\n" | |
result += f"• Use company's full legal name for search\n" | |
result += f"• Consider private company databases for non-public entities\n\n" | |
return result | |
def should_use_for_query(self, query: str) -> bool: | |
"""SEC is good for public company financial and business information""" | |
financial_indicators = [ | |
'company', 'financial', 'revenue', 'earnings', 'profit', 'stock', | |
'investment', 'market cap', 'sec filing', 'annual report', | |
'quarterly', 'balance sheet', 'income statement', 'cash flow', | |
'public company', 'ticker', 'investor', 'shareholder' | |
] | |
query_lower = query.lower() | |
return any(indicator in query_lower for indicator in financial_indicators) | |
def extract_key_info(self, text: str) -> dict: | |
"""Extract key information from SEC results""" | |
base_info = super().extract_key_info(text) | |
if text: | |
# Look for SEC-specific patterns | |
base_info.update({ | |
'has_ticker': any(pattern in text for pattern in ['Ticker Symbol:', 'ticker']), | |
'has_cik': 'CIK:' in text, | |
'has_filings': any(form in text for form in ['10-K', '10-Q', '8-K']), | |
'is_public_company': 'public company' in text.lower(), | |
'has_financial_data': any(term in text.lower() for term in ['revenue', 'earnings', 'financial']), | |
'company_found': 'not found in SEC' not in text | |
}) | |
return base_info |