Spaces:
Sleeping
Sleeping
File size: 3,152 Bytes
64ffc8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
# ---------------- Model Setup ----------------
@st.cache_resource
def load_sentiment_model():
model_id = "LinkLinkWu/Stock_Analysis_Test_Ahamed"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
return pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
@st.cache_resource
def load_ner_model():
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
return pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
sentiment_pipeline = load_sentiment_model()
ner_pipeline = load_ner_model()
# ---------------- Helper Functions ----------------
def fetch_news(ticker):
try:
url = f"https://finviz.com/quote.ashx?t={ticker}"
headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'text/html',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://finviz.com/',
'Connection': 'keep-alive',
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
st.error(f"Failed to fetch page for {ticker}: Status code {response.status_code}")
return []
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.text if soup.title else ""
if ticker not in title:
st.error(f"Page for {ticker} not found or access denied.")
return []
news_table = soup.find(id='news-table')
if news_table is None:
st.error(f"News table not found for {ticker}. The website structure might have changed.")
return []
news = []
for row in news_table.findAll('tr')[:50]:
a_tag = row.find('a')
if a_tag:
title = a_tag.get_text()
link = a_tag['href']
news.append({'title': title, 'link': link})
return news
except Exception as e:
st.error(f"Failed to fetch news for {ticker}: {e}")
return []
def analyze_sentiment(text):
try:
result = sentiment_pipeline(text)[0]
return "Positive" if result['label'] == 'POSITIVE' else "Negative"
except Exception as e:
st.error(f"Sentiment analysis failed: {e}")
return "Unknown"
def extract_org_entities(text):
try:
entities = ner_pipeline(text)
org_entities = []
for ent in entities:
if ent["entity_group"] == "ORG":
clean_word = ent["word"].replace("##", "").strip()
if clean_word.upper() not in org_entities:
org_entities.append(clean_word.upper())
if len(org_entities) >= 5:
break
return org_entities
except Exception as e:
st.error(f"NER entity extraction failed: {e}")
return []
|